pax_global_header00006660000000000000000000000064150767302640014524gustar00rootroot0000000000000052 comment=31ad2ead53712e7b0b7ef140c45f0bdbc6d2d797 rocthrust-6.4.4/000077500000000000000000000000001507673026400135745ustar00rootroot00000000000000rocthrust-6.4.4/.azuredevops/000077500000000000000000000000001507673026400162215ustar00rootroot00000000000000rocthrust-6.4.4/.azuredevops/rocm-ci.yml000066400000000000000000000013671507673026400203040ustar00rootroot00000000000000resources: repositories: - repository: pipelines_repo type: github endpoint: ROCm name: ROCm/ROCm variables: - group: common - template: /.azuredevops/variables-global.yml@pipelines_repo trigger: batch: true branches: include: - develop - mainline paths: exclude: - .githooks - .github - .jenkins - doc - docs - '.*.y*ml' - '*.md' - LICENSE - NOTICES.txt pr: autoCancel: true branches: include: - develop - mainline paths: exclude: - .githooks - .github - .jenkins - doc - docs - '.*.y*ml' - '*.md' - LICENSE - NOTICES.txt drafts: false jobs: - template: ${{ variables.CI_COMPONENT_PATH }}/rocThrust.yml@pipelines_repo rocthrust-6.4.4/.clang-format000066400000000000000000000107361507673026400161560ustar00rootroot00000000000000# Style file for MLSE Libraries based on the modified rocBLAS style # Common settings BasedOnStyle: LLVM TabWidth: 2 IndentWidth: 2 UseTab: Never ColumnLimit: 120 # Other languages JavaScript, Proto --- Language: Cpp # http://releases.llvm.org/6.0.1/tools/clang/docs/ClangFormatStyleOptions.html#disabling-formatting-on-a-piece-of-code # int formatted_code; # // clang-format off # void unformatted_code ; # // clang-format on # void formatted_code_again; DisableFormat: false Standard: c++14 AccessModifierOffset: -2 AlignAfterOpenBracket: true AlignConsecutiveAssignments: true AlignConsecutiveDeclarations: true AlignEscapedNewlinesLeft: true AlignOperands: true AllowAllArgumentsOnNextLine: true AlignTrailingComments: false AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: Empty AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false AlwaysBreakAfterDefinitionReturnType: false AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: true AttributeMacros: [ 'THRUST_DEVICE', 'THRUST_FORCEINLINE', 'THRUST_HOST_DEVICE', 'THRUST_HOST', '_CCCL_DEVICE', '_CCCL_FORCEINLINE', '_CCCL_HOST_DEVICE', '_CCCL_HOST', 'THRUST_RUNTIME_FUNCTION', 'THRUST_DETAIL_KERNEL_ATTRIBUTES', ] BinPackArguments: false BinPackParameters: false # Configure each individual brace in BraceWrapping BreakBeforeBraces: Custom # Control of individual brace wrapping cases BraceWrapping: { AfterCaseLabel: 'false' AfterClass: 'true' AfterControlStatement: 'true' AfterEnum : 'true' AfterFunction : 'true' AfterNamespace : 'true' AfterStruct : 'true' AfterUnion : 'true' BeforeCatch : 'true' BeforeElse : 'true' IndentBraces : 'false' SplitEmptyFunction: 'false' SplitEmptyRecord: 'false' } BreakBeforeConceptDeclarations: true BreakBeforeBinaryOperators: NonAssignment BreakBeforeTernaryOperators: true BreakConstructorInitializers: BeforeComma BreakInheritanceList: BeforeComma EmptyLineAfterAccessModifier: Never EmptyLineBeforeAccessModifier: Always InsertBraces: true InsertNewlineAtEOF: true InsertTrailingCommas: Wrapped IndentRequires: true IndentPPDirectives: AfterHash PackConstructorInitializers: Never PenaltyBreakAssignment: 30 PenaltyBreakTemplateDeclaration: 0 PenaltyIndentedWhitespace: 2 RemoveSemicolon: false SpaceAfterLogicalNot: false SpaceAfterTemplateKeyword: true SpaceBeforeCtorInitializerColon: true SpaceBeforeInheritanceColon: true SpaceBeforeRangeBasedForLoopColon: true CommentPragmas: '^ IWYU pragma:' CompactNamespaces: false ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 2 Cpp11BracedListStyle: true SpaceBeforeCpp11BracedList: false ExperimentalAutoDetectBinPacking: false ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] IndentCaseLabels: true FixNamespaceComments: true IndentWrappedFunctionNames: false KeepEmptyLinesAtTheStartOfBlocks: false MacroBlockBegin: '' MacroBlockEnd: '' #JavaScriptQuotes: Double MaxEmptyLinesToKeep: 1 NamespaceIndentation: None ObjCBlockIndentWidth: 4 #ObjCSpaceAfterProperty: true #ObjCSpaceBeforeProtocolList: true PenaltyBreakBeforeFirstCallParameter: 50 PenaltyBreakComment: 0 PenaltyBreakFirstLessLess: 0 PenaltyBreakString: 70 PenaltyExcessCharacter: 100 PenaltyReturnTypeOnItsOwnLine: 90 PointerAlignment: Left SpaceAfterCStyleCast: true SpaceBeforeAssignmentOperators: true SpaceBeforeParens: ControlStatements SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 SpacesInAngles: Never SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false #SpaceAfterTemplateKeyword: true #SpaceBeforeInheritanceColon: true #SortUsingDeclarations: true SortIncludes: CaseInsensitive ReflowComments: true #IncludeBlocks: Preserve #IndentPPDirectives: AfterHash StatementMacros: [ 'THRUST_EXEC_CHECK_DISABLE', 'THRUST_NAMESPACE_BEGIN', 'THRUST_NAMESPACE_END', 'THRUST_EXEC_CHECK_DISABLE', 'CUB_NAMESPACE_BEGIN', 'CUB_NAMESPACE_END', 'THRUST_NAMESPACE_BEGIN', 'THRUST_NAMESPACE_END', '_LIBCUDACXX_BEGIN_NAMESPACE_STD', '_LIBCUDACXX_END_NAMESPACE_STD', ] TabWidth: 2 UseTab: Never --- rocthrust-6.4.4/.git-blame-ignore-revs000066400000000000000000000006721507673026400177010ustar00rootroot00000000000000# Exclude these commits from git-blame and similar tools. # # To use this file, run the following command from the repo root: # # ``` # $ git config blame.ignoreRevsFile .git-blame-ignore-revs # ``` # # Include a brief comment with each commit added, for example: # # ``` # d92d9f8baac5ec48a8f8718dd69f415a45efe372 # Initial clang-format # ``` # # Only add commits that are pure formatting changes (e.g. # clang-format version changes, etc). rocthrust-6.4.4/.gitattributes000066400000000000000000000001531507673026400164660ustar00rootroot00000000000000*.pdf binary *.doc binary *.docx binary *.ppt binary *.pptx binary *.xls binary *.xlsx binary *.xps binary rocthrust-6.4.4/.githooks/000077500000000000000000000000001507673026400155015ustar00rootroot00000000000000rocthrust-6.4.4/.githooks/install000077500000000000000000000002121507673026400170700ustar00rootroot00000000000000#!/bin/sh cd "$(git rev-parse --git-dir)" cd hooks echo "Installing hooks..." ln -s ../../.githooks/pre-commit pre-commit echo "Done!" rocthrust-6.4.4/.githooks/pre-commit000077500000000000000000000012261507673026400175040ustar00rootroot00000000000000#!/bin/sh # Redirect output to stderr. exec 1>&2 check_failed=false # Do the copyright check # update & apply copyright when hook config is set, otherwise just verify opts="-qc" if [ "$(git config --get --type bool --default false hooks.updateCopyright)" = "true" ]; then opts="-qca" fi if ! "$(git rev-parse --show-toplevel)/scripts/copyright-date/check-copyright.sh" "$opts" 1>&2; then printf "\n\033[31mFailed\033[0m: copyright date check.\n" check_failed=true fi if $check_failed; then printf " Pre-commit check failed, please fix the reported errors. Note: Use '\033[33mgit commit --no-verify\033[0m' to bypass checks.\n" exit 1 fi rocthrust-6.4.4/.github/000077500000000000000000000000001507673026400151345ustar00rootroot00000000000000rocthrust-6.4.4/.github/CODEOWNERS000077500000000000000000000003251507673026400165320ustar00rootroot00000000000000* @stanleytsang-amd @umfranzw @RobsonRLemos @lawruble13 # Documentation files docs/ @ROCm/rocm-documentation *.md @ROCm/rocm-documentation *.rst @ROCm/rocm-documentation .readthedocs.yaml @ROCm/rocm-documentation rocthrust-6.4.4/.github/dependabot.yml000066400000000000000000000012231507673026400177620ustar00rootroot00000000000000# To get started with Dependabot version updates, you'll need to specify which # package ecosystems to update and where the package manifests are located. # Please see the documentation for all configuration options: # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates version: 2 updates: - package-ecosystem: "pip" # See documentation for possible values directory: "/docs/sphinx" # Location of package manifests open-pull-requests-limit: 10 schedule: interval: "daily" labels: - "documentation" - "dependencies" - "ci:docs-only" reviewers: - "samjwu" rocthrust-6.4.4/.github/workflows/000077500000000000000000000000001507673026400171715ustar00rootroot00000000000000rocthrust-6.4.4/.github/workflows/docs.yaml000066400000000000000000000045551507673026400210160ustar00rootroot00000000000000name: Upload to the upload server # Controls when the workflow will run on: push: branches: [develop, master] tags: - rocm-5.* release: types: [published] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: # This workflow contains a single job called "build" build: # The type of runner that the job will run on runs-on: ubuntu-latest # Steps represent a sequence of tasks that will be executed as part of the job steps: # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - uses: actions/checkout@v2 - name: getting branch name shell: bash run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" id: branch_name - name: getting tag name shell: bash run: echo "##[set-output name=tag;]$(echo ${GITHUB_REF_NAME})" id: tag_name - name: zipping files run: zip -r ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip . -x '*.git*' '*.idea*' - name: echo-step run: echo "${{ github.event.release.target_commitish }}" - name: uploading archive to prod if: ${{ steps.branch_name.outputs.branch == 'master' || github.event.release.target_commitish == 'master'}} uses: wlixcc/SFTP-Deploy-Action@v1.0 with: username: ${{ secrets.USERNAME }} server: ${{ secrets.SERVER }} ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }} local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip remote_path: '${{ secrets.PROD_UPLOAD_URL }}' args: '-o ConnectTimeout=5' - name: uploading archive to staging if: ${{ steps.branch_name.outputs.branch == 'develop' || github.event.release.target_commitish == 'develop' }} uses: wlixcc/SFTP-Deploy-Action@v1.0 with: username: ${{ secrets.USERNAME }} server: ${{ secrets.SERVER }} ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }} local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip remote_path: '${{ secrets.STG_UPLOAD_URL }}' args: '-o ConnectTimeout=5' rocthrust-6.4.4/.gitignore000066400000000000000000000012521507673026400155640ustar00rootroot00000000000000### Build dirs ### build/ # Created by https://www.gitignore.io/api/c++,cmake ### C++ ### # Prerequisites *.d # Compiled Object files *.slo *.lo *.o *.obj # Precompiled Headers *.gch *.pch # Compiled Dynamic libraries *.so *.dylib *.dll # Fortran module files *.mod *.smod # Compiled Static libraries *.lai *.la *.a *.lib # Executables *.exe *.out *.app ### CMake ### CMakeCache.txt CMakeFiles CMakeScripts Makefile cmake_install.cmake install_manifest.txt compile_commands.json CTestTestfile.cmake thrust/system/cuda/detail/.gitignore *.bash run build* discrete_voronoi.pgm # End of https://www.gitignore.io/api/c++,cmake .vscode docs/_build/doctrees/environment.pickle rocthrust-6.4.4/.gitlab-ci.yml000066400000000000000000000326531507673026400162410ustar00rootroot00000000000000# ######################################################################## # Copyright 2019-2024 Advanced Micro Devices, Inc. # ######################################################################## include: - project: 'amd/ci-templates' ref: main file: - /defaults.yaml - /deps-cmake.yaml - /deps-docs.yaml - /deps-rocm.yaml - /deps-windows.yaml - /deps-nvcc.yaml - /deps-compiler-acceleration.yaml - /gpus-rocm.yaml - /gpus-nvcc.yaml - /rules.yaml stages: - lint - build # Tests if builds succeed (CMake) - test # Tests if unit tests are passing (CTest) - benchmark # Runs the non-internal benchmarks (Google Benchmark) variables: # Helper variables PACKAGE_DIR: $BUILD_DIR/package ROCPRIM_GIT_BRANCH: develop_stream ROCPRIM_DIR: ${CI_PROJECT_DIR}/rocPRIM copyright-date: extends: - .deps:rocm stage: lint needs: [] tags: - build rules: - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' script: - cd $CI_PROJECT_DIR - git config --global --add safe.directory $CI_PROJECT_DIR - scripts/copyright-date/check-copyright.sh -v -d $CI_MERGE_REQUEST_DIFF_BASE_SHA .cmake-latest: extends: - .deps:rocm - .deps:cmake-latest - .deps:compiler-acceleration before_script: - !reference [".deps:rocm", before_script] - !reference [".deps:cmake-latest", before_script] - !reference [".deps:compiler-acceleration", before_script] .cmake-minimum: extends: - .deps:rocm - .deps:cmake-minimum - .deps:compiler-acceleration before_script: - !reference [".deps:rocm", before_script] - !reference [".deps:cmake-minimum", before_script] - !reference [".deps:compiler-acceleration", before_script] .install-rocprim: script: - branch_name="$ROCPRIM_GIT_BRANCH" - if [[ $CI_COMMIT_BRANCH == "develop" ]] || [[ $CI_COMMIT_BRANCH == "master" ]]; then branch_name=$CI_COMMIT_BRANCH; - fi; - git clone -b $branch_name https://gitlab-ci-token:${CI_JOB_TOKEN}@${ROCPRIM_GIT_URL} $ROCPRIM_DIR - cmake -G Ninja -D CMAKE_CXX_COMPILER=hipcc -D CMAKE_BUILD_TYPE=Release -D BUILD_TEST=OFF -D BUILD_HIPSTDPAR_TEST=OFF -D BUILD_EXAMPLE=OFF -D ROCM_DEP_ROCMCORE=OFF -D CMAKE_C_COMPILER_LAUNCHER=phc_sccache_c -D CMAKE_CXX_COMPILER_LAUNCHER=phc_sccache_cxx -S $ROCPRIM_DIR -B $ROCPRIM_DIR/build - cd $ROCPRIM_DIR/build - cpack -G "DEB" - $SUDO_CMD dpkg -i rocprim*.deb .build:common: stage: build tags: - build extends: - .gpus:rocm-gpus - .rules:build variables: EXTRA_CMAKE_CXX_FLAGS: "" script: - !reference [.install-rocprim, script] - | # Setup env vars for testing rng_seed_count=0; prng_seeds="0"; if [[ $CI_COMMIT_BRANCH == "develop_stream" ]]; then rng_seed_count=3 prng_seeds="0, 1000" fi - | # Add hardened libc++ assertions for tests only if [[ $BUILD_TARGET == "TEST" ]]; then echo "Configuring with hardened libc++!" EXTRA_CMAKE_CXX_FLAGS+=" -D_GLIBCXX_ASSERTIONS=ON" fi # Build rocThrust - cmake -G Ninja -D CMAKE_CXX_COMPILER=hipcc -D CMAKE_CXX_FLAGS="$EXTRA_CMAKE_CXX_FLAGS" -D CMAKE_BUILD_TYPE=$BUILD_TYPE -D BUILD_$BUILD_TARGET=ON -D GPU_TARGETS=$GPU_TARGETS -D AMDGPU_TEST_TARGETS=$GPU_TARGETS -D RNG_SEED_COUNT=$rng_seed_count -D PRNG_SEEDS=$prng_seeds -D CMAKE_C_COMPILER_LAUNCHER=phc_sccache_c -D CMAKE_CXX_COMPILER_LAUNCHER=phc_sccache_cxx -D CMAKE_CUDA_COMPILER_LAUNCHER=phc_sccache_cuda -D CMAKE_CXX_STANDARD="$BUILD_VERSION" -S $CI_PROJECT_DIR -B $CI_PROJECT_DIR/build - cmake --build $CI_PROJECT_DIR/build artifacts: paths: - $CI_PROJECT_DIR/build/benchmarks/* - $CI_PROJECT_DIR/build/test/* - $CI_PROJECT_DIR/build/testing/* - $CI_PROJECT_DIR/build/deps/* - $CI_PROJECT_DIR/build/CMakeCache.txt - $CI_PROJECT_DIR/build/CTestTestfile.cmake - $CI_PROJECT_DIR/build/.ninja_log exclude: - $CI_PROJECT_DIR/build/**/*.o expire_in: 2 weeks build:cmake-latest: stage: build extends: - .cmake-latest - .build:common parallel: matrix: - BUILD_TYPE: Release BUILD_TARGET: [BENCHMARKS, TEST, EXAMPLES] BUILD_VERSION: [14, 17] build:cmake-minimum: stage: build extends: - .cmake-minimum - .build:common parallel: matrix: - BUILD_TYPE: Release BUILD_TARGET: [BENCHMARKS, TEST, EXAMPLES] BUILD_VERSION: 14 build:package: stage: build extends: - .cmake-minimum - .rules:build tags: - build script: - !reference [.install-rocprim, script] - cmake -S $CI_PROJECT_DIR -B $PACKAGE_DIR -G Ninja -D CMAKE_BUILD_TYPE=Release -D CMAKE_CXX_COMPILER=hipcc - cd $PACKAGE_DIR - cpack -G "DEB;ZIP" artifacts: paths: - $PACKAGE_DIR/rocthrust*.deb - $PACKAGE_DIR/rocthrust*.zip expire_in: 2 weeks build:windows: stage: build needs: [] extends: - .rules:build - .gpus:rocm-windows - .deps:rocm-windows - .deps:visual-studio-devshell script: # Download, configure, and install rocPRIM - $BRANCH_NAME=$ROCPRIM_GIT_BRANCH - if ( $CI_COMMIT_BRANCH -eq "develop" -or $CI_COMMIT_BRANCH -eq "master" ) { $branch_name=$CI_COMMIT_BRANCH } - git clone -b $BRANCH_NAME https://gitlab-ci-token:$CI_JOB_TOKEN@$ROCPRIM_GIT_URL $ROCPRIM_DIR - \& cmake -S "$ROCPRIM_DIR" -B "$ROCPRIM_DIR/build" -G Ninja -D CMAKE_BUILD_TYPE=Release -D GPU_TARGETS=$GPU_TARGET -D BUILD_TEST=OFF -D BUILD_EXAMPLE=OFF -D BUILD_BENCHMARK=OFF -D BUILD_SHARED_LIBS=$BUILD_SHARED_LIBS -D CMAKE_CXX_COMPILER:FILEPATH="${env:HIP_PATH}/bin/clang++.exe" -D CMAKE_INSTALL_PREFIX:PATH="$ROCPRIM_DIR/build/install" *>&1 - \& cmake --build "$ROCPRIM_DIR/build" --target install *>&1 # Configure and build rocThrust - \& cmake -S "$CI_PROJECT_DIR" -B "$CI_PROJECT_DIR/build" -G Ninja -D CMAKE_BUILD_TYPE=Release -D GPU_TARGETS=$GPU_TARGET -D BUILD_TEST=ON -D BUILD_EXAMPLES=OFF -D BUILD_BENCHMARKS=OFF -D CMAKE_CXX_FLAGS=-Wno-deprecated-declarations -D CMAKE_CXX_COMPILER:FILEPATH="${env:HIP_PATH}/bin/clang++.exe" -D CMAKE_INSTALL_PREFIX:PATH="$CI_PROJECT_DIR/build/install" -D CMAKE_CXX_STANDARD=14 -D CMAKE_PREFIX_PATH:PATH="$ROCPRIM_DIR/build/install;${env:HIP_PATH}" *>&1 - \& cmake --build "$CI_PROJECT_DIR/build" *>&1 artifacts: paths: - $CI_PROJECT_DIR/build/ - $ROCPRIM_DIR/build/install expire_in: 2 weeks test:package: stage: test needs: - build:package extends: - .cmake-minimum - .rules:test tags: - rocm script: - !reference [.install-rocprim, script] - $SUDO_CMD dpkg -i $PACKAGE_DIR/rocthrust*.deb # Test install - cmake -S $CI_PROJECT_DIR/extra -B $CI_PROJECT_DIR/package_test -G Ninja -D CMAKE_CXX_COMPILER=hipcc -D CMAKE_BUILD_TYPE=Release -D GPU_TARGETS=$GPU_TARGETS -D ROCPRIM_ROOT=/opt/rocm/rocprim - cmake --build $CI_PROJECT_DIR/package_test - cd $CI_PROJECT_DIR/package_test - ctest --output-on-failure # Remove rocPRIM and rocThrust - $SUDO_CMD dpkg -r rocthrust-dev - $SUDO_CMD dpkg -r rocprim-dev test:doc: stage: test variables: SPHINX_DIR: $DOCS_DIR/sphinx extends: - .build:docs - .rules:test test: stage: test extends: - .cmake-minimum - .rules:test - .gpus:rocm needs: - job: build:cmake-minimum parallel: matrix: - BUILD_TYPE: Release BUILD_TARGET: TEST BUILD_VERSION: 14 script: - cd $CI_PROJECT_DIR/build - cmake -D CMAKE_PREFIX_PATH=/opt/rocm -P $CI_PROJECT_DIR/cmake/GenerateResourceSpec.cmake - cat ./resources.json # Parallel execution (with other AMDGPU processes) can oversubscribe the SDMA queue. # This causes the hipMemcpy to fail, which is not reported as an error by HIP. # As a temporary workaround, disable the SDMA for test stability. - HSA_ENABLE_SDMA=0 ctest --output-on-failure --repeat-until-fail 2 --tests-regex $GPU_TARGET --resource-spec-file ./resources.json --parallel $PARALLEL_JOBS .rocm-windows:test: extends: - .gpus:rocm-windows - .rules:test stage: test script: - \& ctest --test-dir "$CI_PROJECT_DIR/build" --output-on-failure --no-tests=error *>&1 test:rocm-windows: extends: - .rocm-windows:test needs: - build:windows .rocm-windows:test-install: extends: - .deps:rocm-windows - .deps:visual-studio-devshell - .gpus:rocm-windows - .rules:test stage: test script: - \& cmake --build "$CI_PROJECT_DIR/build" --target install *>&1 - \& cmake -G Ninja -S "$CI_PROJECT_DIR/extra" -B "$CI_PROJECT_DIR/build/package_test" -D CMAKE_BUILD_TYPE=Release -D GPU_TARGETS=$GPU_TARGET -D CMAKE_CXX_COMPILER:FILEPATH="${env:HIP_PATH}/bin/clang++.exe" -D CMAKE_PREFIX_PATH:PATH="$ROCPRIM_DIR/build/install;${env:HIP_PATH}" *>&1 - \& cmake --build "$CI_PROJECT_DIR/build/package_test" *>&1 - \& ctest --test-dir "$CI_PROJECT_DIR/build/package_test" --output-on-failure --no-tests=error *>&1 test:rocm-windows-install: extends: - .rocm-windows:test-install needs: - build:windows .nvcc: extends: - .deps:nvcc - .gpus:nvcc-gpus - .deps:cmake-latest - .deps:compiler-acceleration - .rules:manual before_script: - !reference [".deps:nvcc", before_script] - !reference [".deps:cmake-latest", before_script] - !reference [".deps:compiler-acceleration", before_script] build:cuda-and-omp: stage: build extends: - .nvcc - .rules:build tags: - build variables: CCCL_GIT_BRANCH: v2.5.0 CCCL_DIR: ${CI_PROJECT_DIR}/cccl needs: [] script: - git clone -b $CCCL_GIT_BRANCH https://github.com/NVIDIA/cccl.git $CCCL_DIR # Replace CCCL Thrust headers with rocThrust headers - rm -R $CCCL_DIR/thrust/thrust - cp -r $CI_PROJECT_DIR/thrust $CCCL_DIR/thrust # Build tests and examples from CCCL Thrust - cmake -G Ninja -D CMAKE_BUILD_TYPE=Release -D CMAKE_CUDA_ARCHITECTURES="$GPU_TARGETS" -D THRUST_ENABLE_TESTING=ON -D THRUST_ENABLE_EXAMPLES=ON -D THRUST_ENABLE_BENCHMARKS=OFF -D THRUST_ENABLE_MULTICONFIG=ON -D THRUST_MULTICONFIG_ENABLE_SYSTEM_OMP=ON -D THRUST_MULTICONFIG_ENABLE_SYSTEM_CUDA=ON -D CMAKE_C_COMPILER_LAUNCHER=phc_sccache_c -D CMAKE_CXX_COMPILER_LAUNCHER=phc_sccache_cxx -D CMAKE_CUDA_COMPILER_LAUNCHER=phc_sccache_cuda -B $CI_PROJECT_DIR/build -S $CCCL_DIR/thrust - cmake --build $CI_PROJECT_DIR/build - cd $CI_PROJECT_DIR/build - ctest --output-on-failure --tests-regex "thrust.example.cmake.add_subdir|thrust.test.cmake.check_source_files" artifacts: paths: - $CI_PROJECT_DIR/build/bin/ - $CI_PROJECT_DIR/build/CMakeCache.txt - $CI_PROJECT_DIR/build/examples/cuda/CTestTestfile.cmake - $CI_PROJECT_DIR/build/examples/CTestTestfile.cmake - $CI_PROJECT_DIR/build/testing/unittest/CTestTestfile.cmake - $CI_PROJECT_DIR/build/testing/async/CTestTestfile.cmake - $CI_PROJECT_DIR/build/testing/omp/CTestTestfile.cmake - $CI_PROJECT_DIR/build/testing/cuda/CTestTestfile.cmake - $CI_PROJECT_DIR/build/testing/regression/CTestTestfile.cmake - $CI_PROJECT_DIR/build/testing/cpp/CTestTestfile.cmake - $CI_PROJECT_DIR/build/testing/CTestTestfile.cmake - $CI_PROJECT_DIR/build/CTestTestfile.cmake - $CCCL_DIR/thrust/cmake/ThrustRunTest.cmake - $CCCL_DIR/thrust/cmake/ThrustRunExample.cmake - $CI_PROJECT_DIR/build/.ninja_log expire_in: 1 week test:cuda-and-omp: stage: test needs: - build:cuda-and-omp extends: - .nvcc - .gpus:nvcc - .rules:test before_script: # This is only needed because of the legacy before_script in .gpus:nvcc would otherwise overwrite before_script - !reference [.nvcc, before_script] script: - cd $CI_PROJECT_DIR/build # These tests are executed on the build stage because they require sources - ctest --output-on-failure --exclude-regex "thrust.example.cmake.add_subdir|thrust.test.cmake.check_source_files" .benchmark-base: stage: benchmark extends: - .rules:benchmark variables: BENCHMARK_RESULT_DIR: ${CI_PROJECT_DIR}/benchmark_results BENCHMARK_RESULT_CACHE_DIR: ${BENCHMARK_RESULT_DIR}_cache benchmark: needs: - build:cmake-minimum extends: - .cmake-minimum - .gpus:rocm - .benchmark-base variables: BENCHMARK_FILENAME_REGEX: ^benchmark BENCHMARK_ALGORITHM_REGEX: "" timeout: 3h script: - 'printf "CI Variables used in benchmarks:\nBENCHMARK_RESULT_DIR: %s\nBENCHMARK_FILENAME_REGEX: %s\nBENCHMARK_ALGORITHM_REGEX: %s \n" "$BENCHMARK_RESULT_DIR" "$BENCHMARK_FILENAME_REGEX" "$BENCHMARK_ALGORITHM_REGEX"' - cd "${CI_PROJECT_DIR}" - mkdir -p "${BENCHMARK_RESULT_DIR}" - python3 .gitlab/run_benchmarks.py --benchmark_dir "${CI_PROJECT_DIR}/build/benchmarks" --benchmark_gpu_architecture "${GPU_TARGET}" --benchmark_output_dir "${BENCHMARK_RESULT_DIR}" --benchmark_filename_regex "${BENCHMARK_FILENAME_REGEX}" --benchmark_filter_regex "${BENCHMARK_ALGORITHM_REGEX}" artifacts: paths: - ${BENCHMARK_RESULT_DIR} expire_in: 1 week rocthrust-6.4.4/.gitlab/000077500000000000000000000000001507673026400151145ustar00rootroot00000000000000rocthrust-6.4.4/.gitlab/run_benchmarks.py000077500000000000000000000116021507673026400204720ustar00rootroot00000000000000#!/usr/bin/env python3 # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. import argparse from collections import namedtuple import os import re import stat import subprocess import sys import time BenchmarkContext = namedtuple('BenchmarkContext', ['gpu_architecture', 'benchmark_output_dir', 'benchmark_dir', 'benchmark_filename_regex', 'benchmark_filter_regex', 'seed']) def run_benchmarks(benchmark_context): def is_benchmark_executable(filename): if not re.match(benchmark_context.benchmark_filename_regex, filename): return False path = os.path.join(benchmark_context.benchmark_dir, filename) st_mode = os.stat(path).st_mode # we are not interested in permissions, just whether there is any execution flag set # and it is a regular file (S_IFREG) return (st_mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)) and (st_mode & stat.S_IFREG) success = True benchmark_names = [name for name in os.listdir(benchmark_context.benchmark_dir) if is_benchmark_executable(name)] benchmark_names.sort() print('The following benchmarks will be ran:\n{}'.format('\n'.join(benchmark_names)), file=sys.stderr, flush=True) for benchmark_name in benchmark_names: results_json_name = f'{benchmark_name}_{benchmark_context.gpu_architecture}.json' benchmark_path = os.path.join(benchmark_context.benchmark_dir, benchmark_name) results_json_path = os.path.join(benchmark_context.benchmark_output_dir, results_json_name) args = [ benchmark_path, '--name_format', 'json', '--benchmark_out_format=json', f'--benchmark_out={results_json_path}', f'--benchmark_filter={benchmark_context.benchmark_filter_regex}' ] if benchmark_context.seed: args += ['--seed', benchmark_context.seed] try: start_time = time.time() subprocess.check_call(args) end_time = time.time() duration = end_time - start_time print(f'Benchmark {benchmark_name} took {duration:.3f} seconds to run', file=sys.stderr, flush=True) except subprocess.CalledProcessError as error: print(f'Could not run benchmark at {benchmark_path}. Error: "{error}"', file=sys.stderr, flush=True) success = False return success def main(): parser = argparse.ArgumentParser() parser.add_argument('--benchmark_dir', help='The local directory that contains the benchmark executables', required=True) parser.add_argument('--benchmark_gpu_architecture', help='The architecture of the currently enabled GPU', required=True) parser.add_argument('--benchmark_output_dir', help='The directory to write the benchmarks to', required=True) parser.add_argument('--benchmark_filename_regex', help='Regular expression that controls the list of benchmark executables to run', default=r'^benchmark', required=False) parser.add_argument('--benchmark_filter_regex', help='Regular expression that controls the list of benchmarks to run in each benchmark executable', default='', required=False) parser.add_argument('--seed', help='Controls the seed for random number generation for each benchmark case', default='', required=False) args = parser.parse_args() benchmark_context = BenchmarkContext( args.benchmark_gpu_architecture, args.benchmark_output_dir, args.benchmark_dir, args.benchmark_filename_regex, args.benchmark_filter_regex, args.seed) benchmark_run_successful = run_benchmarks(benchmark_context) return benchmark_run_successful if __name__ == '__main__': success = main() if success: exit(0) else: exit(1) rocthrust-6.4.4/.jenkins/000077500000000000000000000000001507673026400153135ustar00rootroot00000000000000rocthrust-6.4.4/.jenkins/common.groovy000066400000000000000000000055471507673026400200650ustar00rootroot00000000000000// This file is for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. def runCompileCommand(platform, project, jobName, boolean debug=false, boolean sameOrg=true) { project.paths.construct_build_prefix() String buildTypeArg = debug ? '-DCMAKE_BUILD_TYPE=Debug' : '-DCMAKE_BUILD_TYPE=Release' String buildTypeDir = debug ? 'debug' : 'release' String cmake = platform.jenkinsLabel.contains('centos') ? 'cmake3' : 'cmake' //Set CI node's gfx arch as target if PR, otherwise use default targets of the library String amdgpuTargets = env.BRANCH_NAME.startsWith('PR-') ? '-DAMDGPU_TARGETS=\$gfx_arch' : '' def getDependenciesCommand = "" if (project.installLibraryDependenciesFromCI) { project.libraryDependencies.each { libraryName -> getDependenciesCommand += auxiliary.getLibrary(libraryName, platform.jenkinsLabel, 'develop', sameOrg) } } def command = """#!/usr/bin/env bash set -x ${getDependenciesCommand} cd ${project.paths.project_build_prefix} mkdir -p build/${buildTypeDir} && cd build/${buildTypeDir} ${auxiliary.gfxTargetParser()} ${cmake} --toolchain=toolchain-linux.cmake ${buildTypeArg} ${amdgpuTargets} -DBUILD_TEST=ON -DBUILD_BENCHMARK=ON ../.. make -j\$(nproc) """ platform.runCommand(this, command) } def runTestCommand (platform, project) { String sudo = auxiliary.sudo(platform.jenkinsLabel) def testCommand = "ctest --output-on-failure" def hmmTestCommand = '' // Note: temporarily disable scan tests below while waiting for a compiler fix def excludeRegex = /(reduce_by_key.hip|scan)/ testCommandExclude = "--exclude-regex \"${excludeRegex}\"" if (platform.jenkinsLabel.contains('gfx90a')) { hmmTestCommand = "" // temporarily disable hmm testing // """ // export HSA_XNACK=1 // export ROCTHRUST_USE_HMM=1 // ${testCommand} ${testCommandExclude} // """ } def command = """ #!/usr/bin/env bash set -x cd ${project.paths.project_build_prefix} cd ${project.testDirectory} ${testCommand} ${testCommandExclude} ${hmmTestCommand} """ platform.runCommand(this, command) } def runPackageCommand(platform, project) { def packageHelper = platform.makePackage(platform.jenkinsLabel,"${project.paths.project_build_prefix}/build/release") platform.runCommand(this, packageHelper[0]) platform.archiveArtifacts(this, packageHelper[1]) } return this rocthrust-6.4.4/.jenkins/precheckin.groovy000066400000000000000000000055571507673026400207110ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This file is for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path; def runCI = { nodeDetails, jobName-> def prj = new rocProject('rocThrust', 'precheckin') prj.defaults.ccache = true prj.timeout.compile = 420 prj.libraryDependencies = ["rocPRIM"] // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = false def commonGroovy def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName) } def testCommand = { platform, project-> commonGroovy.runTestCommand(platform, project) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi":[pipelineTriggers([cron('0 1 * * 0')])], "compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], "rocm-docker":[]] propertyList = auxiliary.appendPropertyList(propertyList) Set standardJobNameSet = ["compute-rocm-dkms-no-npi", "compute-rocm-dkms-no-npi-hipclang", "rocm-docker"] def jobNameList = ["compute-rocm-dkms-no-npi":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), "compute-rocm-dkms-no-npi-hipclang":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), "rocm-docker":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']])] jobNameList = auxiliary.appendJobNameList(jobNameList) auxiliary.registerDependencyBranchParameter(["rocPRIM"]) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } Set seenJobNames = [] jobNameList.each { jobName, nodeDetails-> seenJobNames.add(jobName) if (urlJobName == jobName) runCI(nodeDetails, jobName) } // For url job names that are outside of the standardJobNameSet i.e. compute-rocm-dkms-no-npi-1901 if(!seenJobNames.contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) runCI([ubuntu16:['gfx906']], urlJobName) } } rocthrust-6.4.4/.jenkins/staticanalysis.groovy000066400000000000000000000023441507673026400216200ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCompileCommand(platform, project, jobName, boolean debug=false) { project.paths.construct_build_prefix() } def runCI = { nodeDetails, jobName-> def prj = new rocProject('rocThrust', 'StaticAnalysis') // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = false boolean staticAnalysis = true def compileCommand = { platform, project-> runCompileCommand(platform, project, jobName, false) } buildProject(prj , formatCheck, nodes.dockerArray, compileCommand, null, null, staticAnalysis) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * 6')])])) stage(urlJobName) { runCI([ubuntu20:['any']], urlJobName) } } rocthrust-6.4.4/.jenkins/staticlibrary.groovy000066400000000000000000000050131507673026400214350ustar00rootroot00000000000000#!/usr/bin/env groovy @Library('rocJenkins@pong') _ import com.amd.project.* import com.amd.docker.* import java.nio.file.Path; def runCI = { nodeDetails, jobName-> def prj = new rocProject('rocThrust', 'Static Library PreCheckin') prj.defaults.ccache = true prj.timeout.compile = 420 prj.libraryDependencies = ["rocPRIM"] def nodes = new dockerNodes(nodeDetails, jobName, prj) def commonGroovy boolean formatCheck = false def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName, false, true) } def testCommand = { platform, project-> commonGroovy.runTestCommand(platform, project) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi":[pipelineTriggers([cron('0 1 * * 0')])], "compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], "rocm-docker":[]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), "compute-rocm-dkms-no-npi-hipclang":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), "rocm-docker":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']])] jobNameList = auxiliary.appendJobNameList(jobNameList) auxiliary.registerDependencyBranchParameter(["rocPRIM"]) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(jobName) { runCI(nodeDetails, jobName) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(urlJobName) { runCI([ubuntu16:['gfx906']], urlJobName) } } } rocthrust-6.4.4/.readthedocs.yaml000066400000000000000000000005721507673026400170270ustar00rootroot00000000000000# Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details version: 2 sphinx: configuration: docs/conf.py formats: [htmlzip, pdf, epub] python: install: - requirements: docs/sphinx/requirements.txt build: os: ubuntu-22.04 tools: python: "mambaforge-22.9" conda: environment: docs/environment.yml rocthrust-6.4.4/CHANGELOG.md000066400000000000000000000312211507673026400154040ustar00rootroot00000000000000# Changelog for rocThrust Documentation for rocThrust available at [https://rocm.docs.amd.com/projects/rocThrust/en/latest/](https://rocm.docs.amd.com/projects/rocThrust/en/latest/). ## rocThrust 3.3.0 for ROCm 6.4 ### Added * Added a section to install Thread Building Block (TBB) inside `cmake/Dependencies.cmake` if TBB is not already available. * Made Thread Building Block (TBB) an optional dependency with the new `BUILD_HIPSTDPAR_TEST_WITH_TBB` flag, default is `OFF`. When the flag is `OFF` and TBB is not already on the machine it will compile without TBB. Otherwise is will compile it with TBB. * Added extended tests to `rtest.py`. These tests are extra tests that did not fit the criteria of smoke and regression tests. These tests will take much longer to run relative to smoke and regression tests. Use `python rtest.py [--emulation|-e|--test|-t]=extended` to run these tests. * Added regression tests to `rtest.py`. These tests recreate scenarios that have caused hardware problems in past emulation environments. Use `python rtest.py [--emulation|-e|--test|-t]=regression` to run these tests. * Added smoke test options, which runs a subset of the unit tests and ensures that less than 2gb of VRAM will be used. Use `python rtest.py [--emulation|-e|--test|-t]=smoke` to run these tests. * Added `--emulation` option for `rtest.py` * Merged changes from upstream CCCL/thrust 2.4.0 * Merged changes from upstream CCCL/thrust 2.5.0 * Added `find_first_of` to HIPSTDPAR * Added `search` and `find_end` to HIPSTDPAR * Added `search_n` to HIPSTDPAR * Updated HIPSTDPAR's `adjacent_find` to use rocPRIM's implementation ### Changed * Changed the C++ version from 14 to 17. C++14 will be deprecated in the next major release. * `--test|-t` is no longer a required flag for `rtest.py`. Instead, the user can use either `--emulation|-e` or `--test|-t`, but not both. * Split the contents of HIPSTDPAR's forwarding header into several implementation headers. * Fixed `copy_if` to work with large data types (512 bytes) * Updated the required version of Google Benchmark from 1.8.0 to 1.9.0. ### Known Issues * `thrust::inclusive_scan_by_key` might produce incorrect results when it's used with -O2 or -O3 optimization. - The error is caused by a recent compiler change. There is a fix available that will be released at a later date. ## rocThrust 3.2.0 for ROCm 6.3 ### Added * Merged changes from upstream CCCL/thrust 2.3.2 * Only the NVIDIA backend uses `tuple` and `pair` types from libcu++, other backends continue to use the original Thrust implementations and hence do not require libcu++ (CCCL) as a dependency. * Added the `thrust::hip::par_det` execution policy to enable bitwise reproducibility on algorithms that are not bitwise reproducible by default. ### Changed * Updated the default value for the `-a` argument from `rmake.py` to `gfx906:xnack-,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201`. * Enabled the upstream (thrust) test suite for execution by default. It can still be disabled by CMake option `-DENABLE_UPSTREAM_TESTS=OFF`. ### Resolved issues * Fixed an issue in `rmake.py` where the list storing cmake options would contain individual characters instead of a full string of options. * Fixed the HIP backend not passing `TestCopyIfNonTrivial` from the upstream (thrust) test suite. * Fixed tests failing when compiled with `-D_GLIBCXX_ASSERTIONS=ON`. ## rocThrust 3.1.0 for ROCm 6.2 ### Additions * Merged changes from upstream CCCL/thrust 2.2.0 * Updated the contents of `system/hip` and `test` with the upstream changes to `system/cuda` and `testing` * Added HIPSTDPAR library as part of rocThrust. ### Changes * Updated internal calls to `rocprim::detail::invoke_result` to use the public API `rocprim::invoke_result`. * Use `rocprim::device_adjacent_difference` for `adjacent_difference` API call. * Updated internal use of custom iterator in `thrust::detail::unique_by_key` to use rocPRIM's `rocprim::unique_by_key`. * Updated `adjecent_difference` to make use of `rocprim:adjecent_difference` when iterators are comparable and not equal otherwise use `rocprim:adjacent_difference_inplace`. ### Fixes * Fixed incorrect implementation of `thrust::optional::emplace()`. ### Known issues * `thrust::reduce_by_key` outputs are not bit-wise reproducible, as run-to-run results for pseudo-associative reduction operators (e.g. floating-point arithmetic operators) are not deterministic on the same device. * Note that currently, rocThrust memory allocation is performed in such a way that most algorithmic API functions cannot be called from within hipGraphs. ## rocThrust 3.0.0 for ROCm 6.0 ### Additions * Updated to match upstream Thrust 2.0.1 * NV_IF_TARGET macro from libcu++ for NVIDIA backend and HIP implementation for HIP backend. ### Changes * The cmake build system now additionally accepts `GPU_TARGETS` in addition to `AMDGPU_TARGETS` for setting the targeted gpu architectures. `GPU_TARGETS=all` will compile for all supported architectures. `AMDGPU_TARGETS` is only provided for backwards compatibility, `GPU_TARGETS` should be preferred. * Removed cub symlink from the root of the repository. * Removed support for deprecated macros (THRUST_DEVICE_BACKEND and THRUST_HOST_BACKEND). ### Fixes * Fixed a segmentation fault when binary search / upper bound / lower bound / equal range was invoked with `hip_rocprim::execute_on_stream_base` policy. ### Known issues * The `THRUST_HAS_CUDART` macro, which is no longer used in Thrust (it's provided only for legacy support) is replaced with `NV_IF_TARGET` and `THRUST_RDC_ENABLED` in the NVIDIA backend. The HIP backend doesn't have a `THRUST_RDC_ENABLED` macro, so some branches in Thrust code may be unreachable in the HIP backend. ## rocThrust 2.18.0 for ROCm 5.7 ### Fixes * `lower_bound`, `upper_bound`, and `binary_search` failed to compile for certain types. * Fixed issue where `transform_iterator` would not compile with `__device__`-only operators. ### Changes * Updated `docs` directory structure to match the standard of [rocm-docs-core](https://github.com/RadeonOpenCompute/rocm-docs-core). * Removed references to and workarounds for deprecated hcc ## rocThrust 2.17.0 for ROCm 5.5 ### Additions * Updates to match upstream Thrust 1.17.2 ### Changes * `partition_copy` now uses `rocprim::partition_two_way` for increased performance ### Fixes * `set_difference` and `set_intersection` no longer hang if the number of items is above `UINT_MAX` (the unit tests for `set_difference` and `set_intersection` used to fail the `TestSetDifferenceWithBigIndexes`) ## rocThrust 2.16.0 for ROCm 5.3 ### Additions * Updates to match upstream Thrust 1.16.0 ### Changes * rocThrust functionality dependent on device malloc is functional (ROCm 5.2 reenabled device malloc); you can now use device launched `thrust::sort` and `thrust::sort_by_key` ## rocThrust 2.15.0 for ROCm 5.2 ### Additions * Packages for tests and benchmark executables on all supported operating systems using CPack ### Known issues * `async_copy`, `partition`, and `stable_sort_by_key` unit tests are failing for HIP on Windows ## rocThrust 2.14.0 for ROCm 5.1 ### Additions * Updates to match upstream Thrust 1.15.0 ### Known issues * `async_copy`, `partition`, and `stable_sort_by_key` unit tests are failing for HIP on Windows ## rocThrust 2.13.0 for ROCm 5.0 ### Changes * Updates to match upstream Thrust 1.13.0 * Updates to match upstream Thrust 1.14.0 * Added async scan * Scan algorithms: `inclusive_scan` now uses the `input-type` as `accumulator-type`; `exclusive_scan` uses `initial-value-type` * This changes the behavior of small-size input types with large-size output types (e.g. `short` input, `int` output) and low-res input with high-res output (e.g. `float` input, `double` output) ## rocThrust-2.11.2 for ROCm 4.5.0 ### Additions * Initial HIP on Windows support ### Changes * Packaging has changed to a development package (called `rocthrust-dev` for `.deb` packages and `rocthrust-devel` for `.rpm` packages). Because rocThrust is a header-only library, there is no runtime package. To aid in the transition, the development package sets the `provides` field to `rocthrust`, so that existing packages that are dependent on rocThrust can continue to work. This `provides` feature is introduced as a deprecated feature because it will be removed in a future ROCm release. ### Known issues * `async_copy`, `partition`, and `stable_sort_by_key` unit tests are failing for HIP on Windows * Mixed-type exclusive scan algorithm is not using the initial value type for the results type ## [rocThrust-2.11.1 for ROCm 4.4.0] ### Additions * gfx1030 support * AddressSanitizer build option ### Fixes * async_transform unit test failure ## [rocThrust-2.11.0 for ROCm 4.3.0] ### Additions * Updates to match upstream Thrust 1.11 * gfx90a support * gfx803 support re-enabled ## [rocThrust-2.10.9 for ROCm 4.2.0] ### Additions * Updates to match upstream Thrust 1.10 ### Changes * rocThrust now requires CMake version 3.10.2 or greater ### Fixes * Size zero inputs are now properly handled with newer ROCm builds, which no longer allow zero-size kernel grid/block dimensions * Warning of unused results ## [rocThrust-2.10.8 for ROCm 4.1.0] * There are no changes with this release ## [rocThrust-2.10.7 for ROCm 4.0.0] ### Additions * Updated to upstream Thrust 1.10.0 * Implemented runtime error for unsupported algorithms and disabled respective tests * Updated CMake to use downloaded rocPRIM ## [rocThrust-2.10.6 for ROCm 3.10] ### Additions * `copy_if` on device test case ### Known issues * We've disabled ROCm support for device malloc. As a result, rocThrust functionality dependent on device malloc does not work--avoid using device launched `thrust::sort` and `thrust::sort_by_key`. Note that Host launched functionality is not impacted. * A partial enablement of device malloc is possible by setting `HIP_ENABLE_DEVICE_MALLOC` to 1. * `thrust::sort` and `thrust::sort_by_key` may work on certain input sizes but we don't recommended this for production code. ## [rocThrust-2.10.5 for ROCm 3.9.0] ### Additions * Updated to upstream Thrust 1.9.8 * New test cases for device-side algorithms ### Fixes * Bug for binary search * Implemented workarounds for `hipStreamDefault` hang ### Known issues * We've disabled ROCm support for device malloc. As a result, rocThrust functionality dependent on device malloc does not work--avoid using device launched `thrust::sort` and `thrust::sort_by_key`. Note that Host launched functionality is not impacted. * A partial enablement of device malloc is possible by setting `HIP_ENABLE_DEVICE_MALLOC` to 1. * `thrust::sort` and `thrust::sort_by_key` may work on certain input sizes but we don't recommended this for production code. ## [rocThrust-2.10.4 for ROCm 3.8.0] ### Known issues * We've disabled ROCm support for device malloc. As a result, rocThrust functionality dependent on device malloc does not work--avoid using device launched `thrust::sort` and `thrust::sort_by_key`. Note that Host launched functionality is not impacted. * A partial enablement of device malloc is possible by setting `HIP_ENABLE_DEVICE_MALLOC` to 1. * `thrust::sort` and `thrust::sort_by_key` may work on certain input sizes but we don't recommended this for production code. ## [rocThrust-2.10.3 for ROCm 3.7.0] ### Additions * Updated to upstream Thrust 1.9.4 ### Changes * Package dependency has changed to rocPRIM only ### Known issues * We've disabled ROCm support for device malloc. As a result, rocThrust functionality dependent on device malloc does not work--avoid using device launched `thrust::sort` and `thrust::sort_by_key`. Note that Host launched functionality is not impacted. * A partial enablement of device malloc is possible by setting `HIP_ENABLE_DEVICE_MALLOC` to 1. * `thrust::sort` and `thrust::sort_by_key` may work on certain input sizes but we don't recommended this for production code. ## [rocThrust-2.10.2 for ROCm 3.6.0] ### Known issues * We've disabled ROCm support for device malloc. As a result, rocThrust functionality dependent on device malloc does not work--avoid using device launched `thrust::sort` and `thrust::sort_by_key`. Note that Host launched functionality is not impacted. * A partial enablement of device malloc is possible by setting `HIP_ENABLE_DEVICE_MALLOC` to 1. * `thrust::sort` and `thrust::sort_by_key` may work on certain input sizes but we don't recommended this for production code. ## [rocThrust-2.10.1 for ROCm 3.5.0] ### Additions * Improved tests with fixed and random seeds for test data ### Changes * CMake searches for rocThrust locally first; if it isn't found, CMake downloads it from GitHub ### Deprecations * HCC build has been deprecated rocthrust-6.4.4/CMakeLists.txt000066400000000000000000000170651507673026400163450ustar00rootroot00000000000000# ######################################################################## # Copyright 2019-2024 Advanced Micro Devices, Inc. # ######################################################################## cmake_minimum_required(VERSION 3.10.2 FATAL_ERROR) # Install prefix if(WIN32) set(CMAKE_INSTALL_PREFIX ${PROJECT_BINARY_DIR}/package CACHE PATH "Install path prefix, prepended onto install directories") else() set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories") endif() # Thrust project # Note: C is required here for dependencies project(rocthrust LANGUAGES CXX C) # Set CXX flags if (NOT DEFINED CMAKE_CXX_STANDARD) set(CMAKE_CXX_STANDARD 17) endif() set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) # Set HIP flags set(CMAKE_HIP_STANDARD 17) set(CMAKE_HIP_STANDARD_REQUIRED ON) set(CMAKE_HIP_EXTENSIONS OFF) include(CheckLanguage) include(CMakeDependentOption) # Build options # Disable -Werror option(DISABLE_WERROR "Disable building with Werror" ON) option(BUILD_TEST "Build tests" OFF) option(BUILD_HIPSTDPAR_TEST "Build hipstdpar tests" OFF) option(BUILD_HIPSTDPAR_TEST_WITH_TBB "Build hipstdpar tests with TBB" OFF) option(BUILD_EXAMPLES "Build examples" OFF) option(BUILD_BENCHMARKS "Build benchmarks" OFF) option(DOWNLOAD_ROCPRIM "Download rocPRIM and do not search for rocPRIM package" OFF) option(DOWNLOAD_ROCRAND "Download rocRAND and do not search for rocRAND package" OFF) option(BUILD_ADDRESS_SANITIZER "Build with address sanitizer enabled" OFF) cmake_dependent_option(ENABLE_UPSTREAM_TESTS "Enable upstream (thrust) tests" ON BUILD_TEST OFF) #Set the header wrapper OFF by default. option(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY "Build with file/folder reorg with backward compatibility enabled" OFF) check_language(HIP) cmake_dependent_option(USE_HIPCXX "Use CMake HIP language support" OFF CMAKE_HIP_COMPILER OFF) #Adding CMAKE_PREFIX_PATH list( APPEND CMAKE_PREFIX_PATH /opt/rocm/llvm /opt/rocm ${ROCM_PATH} ) # CMake modules list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${ROCM_PATH}/lib/cmake/hip /opt/rocm/lib/cmake/hip # FindHIP.cmake ${HIP_PATH}/cmake /opt/rocm/hip/cmake # FindHIP.cmake ) # Set a default build type if none was specified if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) message(STATUS "Setting build type to 'Release' as none was specified.") set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build." FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "" "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE CACHE BOOL "Add paths to linker search and installed rpath") # rocm-cmake contains common cmake code for rocm projects to help # setup and install include( cmake/FindROCMCmake.cmake ) include( ROCMSetupVersion ) include( ROCMCreatePackage ) include( ROCMInstallTargets ) include( ROCMPackageConfigHelpers ) include( ROCMInstallSymlinks ) include( ROCMHeaderWrapper ) include( ROCMCheckTargetIds ) include( ROCMClients ) if(USE_HIPCXX) enable_language(HIP) else() # Use target ID syntax if supported for GPU_TARGETS if (NOT DEFINED AMDGPU_TARGETS) set(GPU_TARGETS "all" CACHE STRING "GPU architectures to compile for") else() set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "GPU architectures to compile for") endif() set_property(CACHE GPU_TARGETS PROPERTY STRINGS "all") if(GPU_TARGETS STREQUAL "all") if(BUILD_ADDRESS_SANITIZER) # ASAN builds require xnack rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS TARGETS "gfx908:xnack+;gfx90a:xnack+;gfx942:xnack+" ) else() rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201" ) endif() set(GPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "GPU architectures to compile for" FORCE) endif() endif() # Get dependencies include(cmake/Dependencies.cmake) # Verify that supported compilers are used if (NOT WIN32) include(cmake/VerifyCompiler.cmake) endif() set(RNG_SEED_COUNT 0 CACHE STRING "Number of true random sequences to test each input size for") set(PRNG_SEEDS 1 CACHE STRING "Seeds of pseudo random sequences to test each input size for") set(THRUST_HOST_SYSTEM_OPTIONS CPP OMP TBB) set(THRUST_HOST_SYSTEM CPP CACHE STRING "The device backend to target.") set_property( CACHE THRUST_HOST_SYSTEM PROPERTY STRINGS ${THRUST_HOST_SYSTEM_OPTIONS} ) if (NOT THRUST_HOST_SYSTEM IN_LIST THRUST_HOST_SYSTEM_OPTIONS) message( FATAL_ERROR "THRUST_HOST_SYSTEM must be one of ${THRUST_HOST_SYSTEM_OPTIONS}" ) endif () if(DISABLE_WERROR) add_compile_options(-Wall -Wextra) else() add_compile_options(-Wall -Wextra -Werror) endif() if (CMAKE_CXX_STANDARD EQUAL 14) message(WARNING "C++14 will be deprecated in the next major release") elseif(NOT CMAKE_CXX_STANDARD EQUAL 17) message(FATAL_ERROR "Only C++14 and C++17 are supported") endif() if (WIN32) add_compile_options(-xhip) add_compile_definitions(THRUST_IGNORE_DEPRECATED_CPP_DIALECT) endif() # Address Sanitizer if(BUILD_ADDRESS_SANITIZER) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -shared-libasan") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -shared-libasan") add_link_options(-fuse-ld=lld) endif() # Setup VERSION rocm_setup_version(VERSION "3.3.0") # Print configuration summary include(cmake/Summary.cmake) print_configuration_summary() # Thrust (with HIP backend) add_subdirectory(thrust) if(BUILD_TEST OR BUILD_BENCHMARKS OR BUILD_HIPSTDPAR_TEST) rocm_package_setup_component(clients) endif() # Tests if(BUILD_TEST OR BUILD_HIPSTDPAR_TEST) rocm_package_setup_client_component(tests) if (ENABLE_UPSTREAM_TESTS) enable_testing() endif() # We still want the testing to be compiled to catch some errors #TODO: Get testing folder working with HIP on Windows if (NOT WIN32 AND BUILD_TEST) add_subdirectory(testing) endif() enable_testing() add_subdirectory(test) endif() # Examples if(BUILD_EXAMPLES) add_subdirectory(examples) endif() # Benchmarks if(BUILD_BENCHMARKS) add_subdirectory(benchmarks) add_subdirectory(internal/benchmark) endif() #Create header wrapper for backward compatibility if(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY AND NOT WIN32) rocm_wrap_header_dir( ${CMAKE_SOURCE_DIR}/thrust PATTERNS "*.h" "*.inl" "*.cuh" "*.hpp" HEADER_LOCATION include/thrust GUARDS SYMLINK WRAPPER WRAPPER_LOCATIONS rocthrust/${CMAKE_INSTALL_INCLUDEDIR}/thrust OUTPUT_LOCATIONS rocthrust/wrapper/include/thrust ) endif( ) set(THRUST_OPTIONS_DEBUG ${THRUST_OPTIONS_WARNINGS}) set(THRUST_OPTIONS_RELEASE ${THRUST_OPTIONS_WARNINGS}) # Package set(CPACK_DEBIAN_ARCHIVE_TYPE "gnutar") rocm_package_add_deb_dependencies(DEPENDS "rocprim-dev >= 2.10.1") rocm_package_add_rpm_dependencies(DEPENDS "rocprim-devel >= 2.10.1") set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip-thrust, thrust") set(CPACK_RPM_PACKAGE_CONFLICTS "hip-thrust, thrust") set(CPACK_DEBIAN_DEVEL_PACKAGE_PROVIDES "hipstdpar") set(CPACK_RPM_DEVEL_PACKAGE_PROVIDES "hipstdpar") set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") set(CPACK_RPM_PACKAGE_LICENSE "ASL 2.0") # if(NOT CPACK_PACKAGING_INSTALL_PREFIX) # set(CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") # endif() set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "\${CPACK_PACKAGING_INSTALL_PREFIX}" ) rocm_create_package( NAME rocthrust DESCRIPTION "Radeon Open Compute Thrust library" MAINTAINER "rocthrust-maintainer@amd.com" HEADER_ONLY ) rocthrust-6.4.4/CONTRIBUTING.md000066400000000000000000000121301507673026400160220ustar00rootroot00000000000000 # Contributing to rocThrust # We welcome contributions to rocThrust. Please follow these details to help ensure your contributions will be successfully accepted. ## Issue Discussion ## Please use the GitHub Issues tab to notify us of issues. * Use your best judgement for issue creation. If your issue is already listed, upvote the issue and comment or post to provide additional details, such as how you reproduced this issue. * If you're not sure if your issue is the same, err on the side of caution and file your issue. You can add a comment to include the issue number (and link) for the similar issue. If we evaluate your issue as being the same as the existing issue, we'll close the duplicate. * If your issue doesn't exist, use the issue template to file a new issue. * When filing an issue, be sure to provide as much information as possible, including script output so we can collect information about your configuration. This helps reduce the time required to reproduce your issue. * Check your issue regularly, as we may require additional information to successfully reproduce the issue. * You may also open an issue to ask questions to the maintainers about whether a proposed change meets the acceptance criteria, or to discuss an idea pertaining to the library. ## Acceptance Criteria ## rocThrust is a version of the Thrust parallel algorithms library that has been ported to [HIP](https://github.com/ROCm/HIP) and [ROCm](https://www.github.com/ROCm/ROCm). This allows the library to be used on AMD GPU devices. Code in rocThrust should perform only the work that's necessary to support the Thrust API - any algorithmic work should be passed off to the backend system. On AMD platforms, this backend is [rocPRIM](https://github.com/ROCm/rocPRIM). In order to prevent performance regressions, when a pull request is created, a number of automated checks are run. These checks: * test the change on various OS platforms (Ubuntu, RHEL, etc.) * run on different GPU architectures (MI-series, Radeon series cards, etc.) * run benchmarks to check for performance degredation In order for change to be accepted: * it must pass all of the automated checks * it must undergo a code review The GitHub "Issues" tab may also used to discuss ideas surrounding particular features or changes before raising pull requests. ## Code Structure ## Thrust library code is located in the /thrust/ directory. The majority of the code required for porting the library to hip is located in /thrust/system/hip/. HIP tests are located in the /test/ directory, while the original Thrust cuda tests can be found in /testing/. ## Coding Style ## C and C++ code should be formatted using `clang-format`. Use the clang-format version for Clang 9, which is available in the `/opt/rocm` directory. Please do not use your system's built-in `clang-format`, as this is an older version that will have different results. To format a file, use: ``` /opt/rocm/hcc/bin/clang-format -style=file -i ``` To format all files, run the following script in rocThrust directory: ``` #!/bin/bash git ls-files -z *.cc *.cpp *.h *.hpp *.cl *.h.in *.hpp.in *.cpp.in | xargs -0 /opt/rocm/hcc/bin/clang-format -style=file -i ``` Also, githooks can be installed to format the code per-commit: ``` ./.githooks/install ``` ## Pull Request Guidelines ## Our code contribution guidelines closely follows the model of [GitHub pull-requests](https://help.github.com/articles/using-pull-requests/). When you create a pull request, you should target the default branch. Our current default branch is the **develop** branch, which serves as our integration branch. Releases are cut to release/rocm-rel-x.y, where x and y refer to the release major and minor numbers. ### Deliverables ### New changes should include test coverage. HIP tests are located in the /test/ directory, while the original Thrust cuda tests can be found in /testing/. ### Process ### After you create a PR, you can take a look at a diff of the changes you made using the PR's "Files" tab. PRs must pass through the checks and the code review described in the [Acceptance Criteria](#acceptance-criteria) section before they can be merged. Checks may take some time to complete. You can view their progress in the table near the bottom of the pull request page. You may also be able to use the links in the table to view logs associated with a check if it fails. During code reviews, another developer will take a look through your proposed change. If any modifications are requested (or further discussion about anything is needed), they may leave a comment. You can follow up and respond to the comment, and/or create comments of your own if you have questions or ideas. When a modification request has been completed, the conversation thread about it will be marked as resolved. To update the code in your PR (eg. in response to a code review discussion), you can simply push another commit to the branch used in your pull request.rocthrust-6.4.4/LICENSE000066400000000000000000000236761507673026400146170ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS rocthrust-6.4.4/NOTICES.txt000066400000000000000000000145241507673026400154470ustar00rootroot00000000000000Notices and licenses file _________________________ AMD copyrighted code (Apache 2.0) Copyright © 2019-2022 Advanced Micro Devices, Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. crascit-downloadproject v-u (MIT) # Distributed under the OSI-approved MIT License. See accompanying # file LICENSE or https://github.com/Crascit/DownloadProject for details. Dependencies on scipy-scipy v-u (MIT) Copyright (C) 2003-2013 SciPy Developers. Modifications Copyright (C) 2019 Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. Neither the name of Enthought nor the names of the SciPy Developers may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS ISâ€� AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Dependencies on thrust-thrust v1.9.2 (Apache 2.0) Copyright 2008-2013 NVIDIA Corporation Modifications Copyright (C) 2019 Advanced Micro Devices, Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. rocmsoftwareplatform-rocfft v-u (MIT) Copyright © 2016 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE thrust-thrust v1.9.2 (Apache 2.0) Copyright 2008-2013 NVIDIA Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. thrust-thrust v1.9.2 (BSD3) Copyright (c) 2011, Duane Merrill. All rights reserved. Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. rocthrust-6.4.4/README.md000066400000000000000000000274371507673026400150700ustar00rootroot00000000000000# rocThrust > [!NOTE] > The published documentation is available at [rocThrust](https://rocm.docs.amd.com/projects/rocThrust/en/latest/) in an organized, easy-to-read format, with search and a table of contents. The documentation source files reside in the `docs` folder of this repository. As with all ROCm projects, the documentation is open source. For more information on contributing to the documentation, see [Contribute to ROCm documentation](https://rocm.docs.amd.com/en/latest/contribute/contributing.html). Thrust is a parallel algorithm library. It has been ported to [HIP](https://github.com/ROCm/HIP) and [ROCm](https://www.github.com/ROCm/ROCm), which use the [rocPRIM](https://github.com/ROCm/rocPRIM) library. The HIP-ported library works on HIP and ROCm software. Currently there is no CUDA backend in place. ## Requirements Software requirements include: * CMake (3.10.2 or later) * AMD [ROCm](https://rocm.docs.amd.com) Software (1.8.0 or later) * Including the [HipCC](https://github.com/ROCm/HIP) compiler, which must be set as your C++ compiler for ROCm * [rocPRIM](https://github.com/ROCm/rocPRIM) library * This is automatically downloaded and built by the CMake script * Python 3.6 or higher (for HIP on Windows; only required for install scripts) * Visual Studio 2019 with Clang support (for HIP on Windows) * Strawberry Perl (for HIP on Windows) Optional: * [GoogleTest](https://github.com/google/googletest) * Required only for tests; building tests is enabled by default * This is automatically downloaded and built by the CMake script * [doxygen](https://www.doxygen.nl/) * Required for building the documentation For ROCm hardware requirements, refer to: * [Linux support](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html) * [Windows support](https://rocm.docs.amd.com/projects/install-on-windows/en/latest/reference/system-requirements.html) ## Documentation Documentation for rocThrust available at [https://rocm.docs.amd.com/projects/rocThrust/en/latest/](https://rocm.docs.amd.com/projects/rocThrust/en/latest/). You can build our documentation locally using the following commands: ```shell # Go to rocThrust docs directory cd rocThrust; cd docs # Install Python dependencies python3 -m pip install -r sphinx/requirements.txt # Build the documentation python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html # For e.g. serve the HTML docs locally cd _build/html python3 -m http.server ``` ## Build and install ```sh git clone https://github.com/ROCm/rocThrust # Go to rocThrust directory, create and go to the build directory. cd rocThrust; mkdir build; cd build # Configure rocThrust, setup options for your system. # Build options: # DISABLE_WERROR - ON by default, This flag disable the -Werror compiler flag # BUILD_TEST - OFF by default, # BUILD_HIPSTDPAR_TEST - OFF by default, # BUILD_EXAMPLES - OFF by default, # BUILD_BENCHMARKS - OFF by default, # DOWNLOAD_ROCPRIM - OFF by default, when ON rocPRIM will be downloaded to the build folder, # RNG_SEED_COUNT - 0 by default, controls non-repeatable random dataset count # PRNG_SEEDS - 1 by default, reproducible seeds to generate random data # # ! IMPORTANT ! # On ROCm platform set C++ compiler to HipCC. You can do it by adding 'CXX=' # before 'cmake' or setting cmake option 'CMAKE_CXX_COMPILER' with the path to the HipCC compiler. # [CXX=hipcc] cmake ../. # or cmake-gui ../. # Build make -j4 # Optionally, run tests if they're enabled. ctest --output-on-failure # Package make package # Install [sudo] make install ``` ### HIP on Windows We've added initial support for HIP on Windows. To install, use the provided `rmake.py` Python script: ```shell git clone https://github.com/ROCm/rocThrust.git cd rocThrust # the -i option will install rocPRIM to C:\hipSDK by default python rmake.py -i # the -c option will build all clients including unit tests python rmake.py -c ``` ### Macro options ```cpp # Performance improvement option. If you define THRUST_HIP_PRINTF_ENABLED before # thrust includes to 0, you can disable printfs on device side and improve # performance. The default value is 1 #define THRUST_HIP_PRINTF_ENABLED 0 ``` ### Using rocThrust in a project We recommended including rocThrust into a CMake project by using its package configuration files. ```cmake # On ROCm rocThrust requires rocPRIM find_package(rocprim REQUIRED CONFIG PATHS "/opt/rocm/rocprim") # "/opt/rocm" - default install prefix find_package(rocthrust REQUIRED CONFIG PATHS "/opt/rocm/rocthrust") ... includes rocThrust headers and roc::rocprim_hip target target_link_libraries( roc::rocthrust) ``` ## Running unit tests ```sh # Go to rocThrust build directory cd rocThrust; cd build # Configure with examples flag on CXX=hipcc cmake -DBUILD_TEST=ON .. # Build tests make -j4 # To run all tests ctest # To run unit tests for rocThrust ./test/ ``` ### Using multiple GPUs concurrently for testing This feature requires CMake 3.16+ to be used for building and testing. *(Prior versions of CMake can't assign IDs to tests when running in parallel. Assigning tests to distinct devices could only be done at the cost of extreme complexity.)* Unit tests can make use of the [CTest Resource Allocation](https://cmake.org/cmake/help/latest/manual/ctest.1.html#resource-allocation) feature, which enables distributing tests across multiple GPUs in an intelligent manner. This feature can accelerate testing when multiple GPUs of the same family are in a system. It can also test multiple product families from one invocation without having to use the `HIP_VISIBLE_DEVICES` environment variable. CTest Resource Allocation requires a resource spec file. ```important Using `RESOURCE_GROUPS` and `--resource-spec-file` with CMake and CTest, respectively for versions prior to 3.16 omits the feature silently. Therefore, you must ensure that the `cmake` and `ctest` you invoke are sufficiently recent. ``` #### Auto resource spec generation There is a utility script in the repo that may be called independently: ```shell # Go to rocThrust build directory cd rocThrust; cd build # Invoke directly or use CMake script mode via cmake -P ../cmake/GenerateResourceSpec.cmake # Assuming you have 2 compatible GPUs in the system ctest --resource-spec-file ./resources.json --parallel 2 ``` #### Manual Assuming you have two GPUs from the gfx900 family and they are the first devices enumerated by the system, you can specify `-D AMDGPU_TEST_TARGETS=gfx900` during configuration to specify that you want only one family to be tested. If you leave this var empty (default), the default device in the system is targeted. To specify that there are two GPUs that should be targeted, you must feed a JSON file to CTest using the `--resource-spec-file ` flag. For example: ```json { "version": { "major": 1, "minor": 0 }, "local": [ { "gfx900": [ { "id": "0" }, { "id": "1" } ] } ] } ``` ## Using custom seeds for the tests There are two CMake configuration-time options that control random data fed to unit tests. * `RNG_SEED_COUNT`: 0 by default, controls non-repeatable random dataset count. * Draws values from a default constructed `std::random_device`. * Should tests fail, the actual seed producing the failure is reported by Googletest, which allows for reproducibility. * `PRNG_SEEDS`: 1 by default, controls repeatable dataset seeds. * This is a CMake formatted (semicolon delimited) array of 32-bit unsigned integers. Note that semicolons often collide with shell command parsing. We advise escaping the entire CMake CLI argument to avoid having the variable pick up quotation marks. For example, pass `cmake "-DPRNG_SEEDS=1;2;3;4"` instead of `cmake -DPRNG_SEEDS="1;2;3;4"` (these cases differ in how the CMake executable receives arguments from the operating system). ## Running examples ```sh # Go to rocThrust build directory cd rocThrust; cd build # Configure with examples flag on CXX=hipcc cmake -DBUILD_EXAMPLES=ON .. # Build examples make -j4 # Run the example you want to run # ./examples/example_thrust_ # For example: ./examples/example_thrust_version # Example for linking with cpp files ./examples/cpp_integration/example_thrust_linking ``` ## Running benchmarks ```sh # Go to rocThrust build directory cd rocThrust; cd build # Configure with benchmarks flag on CXX=hipcc cmake -DBUILD_BENCHMARKS=ON .. # Build benchmarks make -j4 # Run the benchmarks ./benchmarks/benchmark_thrust_bench ``` ## HIPSTDPAR rocThrust also hosts the header files for [HIPSTDPAR](https://rocm.blogs.amd.com/software-tools-optimization/hipstdpar/README.html#c-17-parallel-algorithms-and-hipstdpar). Within these headers, a great part of the C++ Standard Library parallel algorithms are overloaded so that rocThrust's and rocPRIM's implementations of those algorithms are used when they are invoked with the `parallel_unsequenced_policy` policy. When compiling with the proper flags (see [LLVM (AMD's fork) docs](https://github.com/ROCm/llvm-project/blob/rocm-6.2.x/clang/docs/HIPSupport.rst#implementation-driver)[^1] for the complete list), the HIPSTDPAR headers are implicitly included by the compiler, and therefore the execution of these parallel algorithms will be offloaded to AMD devices. [^1]: Altough currently only AMD's fork of LLVM contains the docs for the [C++ Standard Parallelism Offload Support](https://github.com/ROCm/llvm-project/blob/rocm-6.2.x/clang/docs/HIPSupport.rst#c-standard-parallelism-offload-support-compiler-and-runtime), both of them (the upstream LLVM and AMD's fork) do support it. ### Install HIPSTDPAR is currently packaged along rocThrust. The `hipstdpar` package is set up as a virtual package provided by `rocthrust`, so the latter needs to be installed entirely for getting HIPSTDPAR's headers. Conversely, installing the `rocthrust` package will also include HIPSTDPAR's headers in the system. ### Tests rocThrust also includes tests to check the correct building of HIPSTDPAR implementations. They are located in the [tests/hipstdpar](/test/hipstdpar/) folder. When configuring the project with the `BUILD_TEST` option, these tests will not be enabled by default. To enable them, set `BUILD_HIPSTDPAR_TEST=ON`. Additionally, you can configure only HIPSTDPAR's tests by disabling `BUILD_TEST` and enabling `BUILD_HIPSTDPAR_TEST`. In general, the following steps can be followed for building and running the tests: ```sh git clone https://github.com/ROCm/rocThrust # Go to rocThrust directory, create and go to the build directory. cd rocThrust; mkdir build; cd build # Configure rocThrust. [CXX=hipcc] cmake ../. -D BUILD_TEST=ON # Configure rocThrust's tests. [CXX=hipcc] cmake ../. -D BUILD_TEST=ON -D BUILD_HIPSTDPAR_TEST=ON # Configure both rocThrust's tests and HIPSTDPAR's tests. [CXX=hipcc] cmake ../. -D BUILD_TEST=OFF -D BUILD_HIPSTDPAR_TEST=ON # Only configure HIPSTDPAR's tests. # Build make -j4 # Run tests. ctest --output-on-failure ``` #### Requirements * [rocPRIM](https://github.com/ROCm/rocPRIM) and [rocThrust](https://github.com/ROCm/rocThrust) libraries * [TBB](https://www.intel.com/content/www/us/en/developer/tools/oneapi/onetbb.html) library * Notice that oneTBB (oneAPI TBB) may fail to compile when libstdc++-9 or -10 is used, due to them using legacy TBB interfaces that are incompatible with the oneTBB ones (see the [release notes](https://www.intel.com/content/www/us/en/developer/articles/release-notes/intel-oneapi-threading-building-blocks-release-notes.html)). * CMake (3.10.2 or later) ## Support You can report bugs and feature requests through the GitHub [issue tracker](https://github.com/ROCm/rocThrust/issues). ## License rocThrust is distributed under the [Apache 2.0 LICENSE](./LICENSE). rocthrust-6.4.4/benchmarks/000077500000000000000000000000001507673026400157115ustar00rootroot00000000000000rocthrust-6.4.4/benchmarks/CMakeLists.txt000066400000000000000000000052241507673026400204540ustar00rootroot00000000000000# ######################################################################## # Copyright 2024 Advanced Micro Devices, Inc. # ######################################################################## include(Benchmarks) # Benchmarks directory in project's root set(BENCHMARKS_ROOT "${CMAKE_CURRENT_LIST_DIR}") # Subdirectory of BENCHMARKS_ROOT containing all the .cu files grouped in # subdirectories accordingly to the functionality benchmarked set(BENCHMARKS_DIR "bench") # **************************************************************************** # Functions # **************************************************************************** # Gets all subdirs of benchmarks recursively function(get_recursive_subdirs subdirs dir_prefix dir_name) set(dirs) file(GLOB_RECURSE contents CONFIGURE_DEPENDS LIST_DIRECTORIES ON "${dir_prefix}/${dir_name}/*" ) foreach(dir IN LISTS contents) if(IS_DIRECTORY "${dir}") list(APPEND dirs "${dir}") endif() endforeach() set(${subdirs} "${dirs}" PARENT_SCOPE) endfunction() function(add_bench_dir bench_dir) # Get algorithm name get_filename_component(algo_name "${bench_dir}" NAME_WLE) # For scan, we also append the parent diretory name, as the algo_name # will be exclusive/inclusive get_filename_component(PARENT_DIR "${bench_dir}" DIRECTORY) get_filename_component(PARENT_DIR_NAME "${PARENT_DIR}" NAME_WLE) if(PARENT_DIR_NAME STREQUAL "scan") set(algo_name "${PARENT_DIR_NAME}_${algo_name}") endif() # Take all .cu (tests) inside the dir file(GLOB bench_srcs CONFIGURE_DEPENDS "${bench_dir}/*.cu") # Set benchmark prefix as its relative path to benchmarks separated # by . instead of / file(RELATIVE_PATH bench_prefix "${BENCHMARKS_ROOT}" "${bench_dir}") file(TO_CMAKE_PATH "${bench_prefix}" bench_prefix) string(REPLACE "/" "." bench_prefix "${bench_prefix}") # Add each benchmark as thrust benchmark foreach(bench_src IN LISTS bench_srcs) set(real_bench_src "${bench_src}") # Get file name without directory nor last extension get_filename_component(bench_name "${bench_src}" NAME_WLE) add_thrust_benchmark("${algo_name}_${bench_name}" ${bench_src} ON) endforeach() endfunction() # **************************************************************************** # Benchmarks # **************************************************************************** message (STATUS "Configuring benchmarks") # Get all the subdirectories inside the bench directory get_recursive_subdirs(subdirs ${BENCHMARKS_ROOT} ${BENCHMARKS_DIR}) # Add benchmarks from each subdirectory present in bench foreach(subdir IN LISTS subdirs) add_bench_dir("${subdir}") endforeach() rocthrust-6.4.4/benchmarks/bench/000077500000000000000000000000001507673026400167705ustar00rootroot00000000000000rocthrust-6.4.4/benchmarks/bench/adjacent_difference/000077500000000000000000000000001507673026400227135ustar00rootroot00000000000000rocthrust-6.4.4/benchmarks/bench/adjacent_difference/basic.cu000066400000000000000000000133501507673026400243270ustar00rootroot00000000000000// MIT License // // Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. // Benchmark utils #include "../../bench_utils/bench_utils.hpp" // rocThrust #include #include #include // Google Benchmark #include // STL #include #include #include struct basic { template float64_t run(thrust::device_vector& input, thrust::device_vector& output, Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); thrust::adjacent_difference(policy, input.cbegin(), input.cend(), output.begin()); d_timer.stop(0); return d_timer.get_duration(); } }; template void run_benchmark(benchmark::State& state, const std::size_t elements, const std::string seed_type) { // Benchmark object Benchmark benchmark {}; // GPU times std::vector gpu_times; // Generate input thrust::device_vector input = bench_utils::generate(elements, seed_type); // Output thrust::device_vector output(elements); bench_utils::caching_allocator_t alloc {}; thrust::detail::device_t policy {}; for(auto _ : state) { float64_t duration = benchmark.template run(input, output, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } // BytesProcessed include read and written bytes, so when the BytesProcessed/s are reported // it will actually be the global memory bandwidth gotten. state.SetBytesProcessed(state.iterations() * 2 * elements * sizeof(T)); state.SetItemsProcessed(state.iterations() * elements); const double gpu_cv = bench_utils::StatisticsCV(gpu_times); state.counters["gpu_noise"] = gpu_cv; } #define CREATE_BENCHMARK(T, Elements) \ benchmark::RegisterBenchmark( \ bench_utils::bench_naming::format_name("{algo:adjacent_difference,subalgo:" + name \ + ",input_type:" #T + ",elements:" #Elements) \ .c_str(), \ run_benchmark, \ Elements, \ seed_type) #define BENCHMARK_TYPE(type) \ CREATE_BENCHMARK(type, 1 << 16), CREATE_BENCHMARK(type, 1 << 20), \ CREATE_BENCHMARK(type, 1 << 24), CREATE_BENCHMARK(type, 1 << 28) template void add_benchmarks(const std::string& name, std::vector& benchmarks, const std::string seed_type) { std::vector bs = {BENCHMARK_TYPE(int8_t), BENCHMARK_TYPE(int16_t), BENCHMARK_TYPE(int32_t), BENCHMARK_TYPE(int64_t), BENCHMARK_TYPE(float32_t), BENCHMARK_TYPE(float64_t)}; benchmarks.insert(benchmarks.end(), bs.begin(), bs.end()); } int main(int argc, char* argv[]) { cli::Parser parser(argc, argv); parser.set_optional( "name_format", "name_format", "human", "either: json,human,txt"); parser.set_optional("seed", "seed", "random", bench_utils::get_seed_message()); parser.run_and_exit_if_error(); // Parse argv benchmark::Initialize(&argc, argv); bench_utils::bench_naming::set_format( parser.get("name_format")); /* either: json,human,txt */ const std::string seed_type = parser.get("seed"); // Benchmark info bench_utils::add_common_benchmark_info(); benchmark::AddCustomContext("seed", seed_type); // Add benchmark std::vector benchmarks; add_benchmarks("basic", benchmarks, seed_type); // Use manual timing for(auto& b : benchmarks) { b->UseManualTime(); b->Unit(benchmark::kMicrosecond); b->MinTime(0.4); // in seconds } // Run benchmarks benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); return 0; } rocthrust-6.4.4/benchmarks/bench/adjacent_difference/custom.cu000066400000000000000000000141311507673026400245560ustar00rootroot00000000000000// MIT License // // Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. // Benchmark utils #include "../../bench_utils/bench_utils.hpp" // rocThrust #include #include #include // Google Benchmark #include // STL #include #include #include template struct custom_op { T val; custom_op() = delete; explicit custom_op(T val) : val(val) { } __device__ T operator()(const T& lhs, const T& rhs) { return lhs * rhs + val; } }; template struct custom { template float64_t run(thrust::device_vector& input, thrust::device_vector& output, Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); thrust::adjacent_difference( policy, input.cbegin(), input.cend(), output.begin(), custom_op {Val}); d_timer.stop(0); return d_timer.get_duration(); } }; template void run_benchmark(benchmark::State& state, const std::size_t elements, const std::string seed_type) { // Benchmark object Benchmark benchmark {}; // GPU times std::vector gpu_times; // Generate input thrust::device_vector input = bench_utils::generate(elements, seed_type); // Output thrust::device_vector output(elements); bench_utils::caching_allocator_t alloc {}; thrust::detail::device_t policy {}; for(auto _ : state) { float64_t duration = benchmark.template run(input, output, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } // BytesProcessed include read and written bytes, so when the BytesProcessed/s are reported // it will actually be the global memory bandwidth gotten. state.SetBytesProcessed(state.iterations() * 2 * elements * sizeof(T)); state.SetItemsProcessed(state.iterations() * elements); const double gpu_cv = bench_utils::StatisticsCV(gpu_times); state.counters["gpu_noise"] = gpu_cv; } #define CREATE_BENCHMARK(T, Elements) \ benchmark::RegisterBenchmark( \ bench_utils::bench_naming::format_name("{algo:adjacent_difference,subalgo:" + name \ + ",input_type:" #T + ",elements:" #Elements) \ .c_str(), \ run_benchmark, T>, \ Elements, \ seed_type) #define BENCHMARK_TYPE(type) \ CREATE_BENCHMARK(type, 1 << 16), CREATE_BENCHMARK(type, 1 << 20), \ CREATE_BENCHMARK(type, 1 << 24), CREATE_BENCHMARK(type, 1 << 28) template