pax_global_header00006660000000000000000000000064150047422310014510gustar00rootroot0000000000000052 comment=2656692311e9a839ce48817c284c8bc6d2fec3f2 hipBLAS-rocm-6.4.3/000077500000000000000000000000001500474223100137025ustar00rootroot00000000000000hipBLAS-rocm-6.4.3/.azuredevops/000077500000000000000000000000001500474223100163275ustar00rootroot00000000000000hipBLAS-rocm-6.4.3/.azuredevops/rocm-ci.yml000066400000000000000000000012411500474223100204010ustar00rootroot00000000000000resources: repositories: - repository: pipelines_repo type: github endpoint: ROCm name: ROCm/ROCm variables: - group: common - template: /.azuredevops/variables-global.yml@pipelines_repo trigger: batch: true branches: include: - develop - mainline paths: exclude: - .githooks - .github - .jenkins - docs - '.*.y*ml' - '*.md' pr: autoCancel: true branches: include: - develop - mainline paths: exclude: - .githooks - .github - .jenkins - docs - '.*.y*ml' - '*.md' drafts: false jobs: - template: ${{ variables.CI_COMPONENT_PATH }}/hipBLAS.yml@pipelines_repo hipBLAS-rocm-6.4.3/.clang-format000066400000000000000000000065421500474223100162640ustar00rootroot00000000000000# Style file for MLSE Libraries based on the modified rocBLAS style # Common settings BasedOnStyle: WebKit TabWidth: 4 IndentWidth: 4 UseTab: Never ColumnLimit: 100 # Other languages JavaScript, Proto --- Language: Cpp # http://releases.llvm.org/6.0.1/tools/clang/docs/ClangFormatStyleOptions.html#disabling-formatting-on-a-piece-of-code # int formatted_code; # // clang-format off # void unformatted_code ; # // clang-format on # void formatted_code_again; DisableFormat: false Standard: Cpp11 AccessModifierOffset: -4 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: true AlignConsecutiveDeclarations: true AlignEscapedNewlines: Left AlignOperands: true AlignTrailingComments: false AllowAllArgumentsOnNextLine: true AllowAllConstructorInitializersOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: Empty AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false AlwaysBreakAfterDefinitionReturnType: false AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: true BinPackArguments: false BinPackParameters: false # Configure each individual brace in BraceWrapping BreakBeforeBraces: Custom # Control of individual brace wrapping cases BraceWrapping: { AfterCaseLabel: 'true' AfterClass: 'true' AfterControlStatement: 'true' AfterEnum : 'true' AfterFunction : 'true' AfterNamespace : 'true' AfterStruct : 'true' AfterUnion : 'true' BeforeCatch : 'true' BeforeElse : 'true' IndentBraces : 'false' # AfterExternBlock : 'true' } #BreakAfterJavaFieldAnnotations: true #BreakBeforeInheritanceComma: false #BreakBeforeBinaryOperators: None #BreakBeforeTernaryOperators: true #BreakConstructorInitializersBeforeComma: true #BreakStringLiterals: true CommentPragmas: '^ IWYU pragma:' #CompactNamespaces: false ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true SpaceBeforeCpp11BracedList: false DerivePointerAlignment: false ExperimentalAutoDetectBinPacking: false ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] IndentCaseLabels: false IndentPPDirectives: None #FixNamespaceComments: true IndentWrappedFunctionNames: true KeepEmptyLinesAtTheStartOfBlocks: true MacroBlockBegin: '' MacroBlockEnd: '' #JavaScriptQuotes: Double MaxEmptyLinesToKeep: 1 NamespaceIndentation: All ObjCBlockIndentWidth: 4 #ObjCSpaceAfterProperty: true #ObjCSpaceBeforeProtocolList: true PenaltyBreakBeforeFirstCallParameter: 19 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 PointerAlignment: Left SpaceAfterCStyleCast: false SpaceBeforeAssignmentOperators: true SpaceBeforeParens: Never SpaceInEmptyBlock: false SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 SpacesInAngles: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false #SpaceAfterTemplateKeyword: true #SpaceBeforeInheritanceColon: true #SortUsingDeclarations: true SortIncludes: true # Comments are for developers, they should arrange them ReflowComments: false #IncludeBlocks: Preserve --- hipBLAS-rocm-6.4.3/.githooks/000077500000000000000000000000001500474223100156075ustar00rootroot00000000000000hipBLAS-rocm-6.4.3/.githooks/install000077500000000000000000000002221500474223100171770ustar00rootroot00000000000000#!/usr/bin/env bash cd $(git rev-parse --git-dir) cd hooks echo "Installing hooks..." ln -s ../../.githooks/pre-commit pre-commit echo "Done!" hipBLAS-rocm-6.4.3/.githooks/pre-commit000077500000000000000000000046321500474223100176160ustar00rootroot00000000000000#!/bin/bash # # This pre-commit hook checks if any versions of clang-format # are installed, and if so, uses the installed version to format # the staged changes. export PATH=$PATH:/opt/rocm/llvm/bin:/usr/bin:/bin # Redirect stdout to stderr. exec >&2 # Do everything from top - level cd $(git rev-parse --show-toplevel) if git rev-parse --verify HEAD >/dev/null 2>&1; then against=HEAD else # Initial commit: diff against an empty tree object against=8c64666c40d9eff214389fbcc1648331fb17a029 fi if [[ "$1" == "--reformat" ]]; then files=$(git ls-files --exclude-standard) else files=$(git diff-index --cached --name-only $against) fi [[ -z "$files" ]] && exit # Change the copyright date at the top of any text files for file in $files; do [[ -L $file ]] && continue echo "Processing copyright dates in $file" if [[ -e $file ]]; then /usr/bin/perl -pi -e 'INIT { exit 1 if !-f $ARGV[0] || -B $ARGV[0]; $year = (localtime)[5] + 1900 } s/^([*\/#\/"*[:space:]]*)Copyright\s+(?:\(C\)\s*)?(\d+)(?:\s*-\s*\d+)?\s(Advanced\s*Micro\s*Devices)/qq($1Copyright (C) $2@{[$year != $2 ? "-$year" : ""]} $3)/ie if $. < 10' "$file" && git add -u "$file" fi done # do the formatting for file in $files; do [[ -L $file ]] && continue if [[ -e $file ]] && echo $file | grep -Eq '\.c$|\.h$|\.hpp$|\.cpp$|\.cl$|\.in$|\.txt$|\.yaml$|\.yml$|\.sh$|\.py$|\.pl$|\.cmake$|\.md$|\.rst$|\.groovy$|\.ini$|\.awk$|\.csv$'; then echo "Processing line endings in $file" sed -i -e 's/[[:space:]]*$//' "$file" # Remove whitespace at end of lines sed -i -e '$a\' "$file" # Add missing newline to end of file echo "Converting non-ASCII characters to ASCII equivalents in $file" # Convert UTF8 non-ASCII to ASCII temp=$(mktemp) [[ -w $temp ]] || exit iconv -s -f utf-8 -t ascii//TRANSLIT "$file" > "$temp" || exit chmod --reference="$file" "$temp" || exit mv -f "$temp" "$file" || exit git add -u "$file" fi done # if clang-format exists, run it on C/C++ files if command -v clang-format >/dev/null; then for file in $files; do [[ -L $file ]] && continue if [[ -e $file ]] && echo $file | grep -Eq '\.c$|\.h$|\.hpp$|\.cpp$|\.cl$|\.h\.in$|\.hpp\.in$|\.cpp\.in$'; then echo "clang-format $file" clang-format -i -style=file "$file" git add -u "$file" fi done fi hipBLAS-rocm-6.4.3/.github/000077500000000000000000000000001500474223100152425ustar00rootroot00000000000000hipBLAS-rocm-6.4.3/.github/CODEOWNERS000077500000000000000000000013001500474223100166320ustar00rootroot00000000000000* @amcamd @TorreZuk @mahmoodw @daineAMD @bragadeesh @NaveenElumalaiAMD @rkamd # Documentation files docs/* @ROCm/rocm-documentation @amcamd @TorreZuk @mahmoodw @daineAMD @bragadeesh @NaveenElumalaiAMD @rkamd *.md @ROCm/rocm-documentation @amcamd @TorreZuk @mahmoodw @daineAMD @bragadeesh @NaveenElumalaiAMD @rkamd *.rst @ROCm/rocm-documentation @amcamd @TorreZuk @mahmoodw @daineAMD @bragadeesh @NaveenElumalaiAMD @rkamd .readthedocs.yaml @ROCm/rocm-documentation @amcamd @TorreZuk @mahmoodw @daineAMD @bragadeesh @NaveenElumalaiAMD @rkamd # Header directory for Doxygen documentation library/include/* @ROCm/rocm-documentation @amcamd @TorreZuk @mahmoodw @daineAMD @bragadeesh @NaveenElumalaiAMD @rkamd hipBLAS-rocm-6.4.3/.github/CONTRIBUTING.rst000066400000000000000000000114541500474223100177100ustar00rootroot00000000000000======================= Contributing to hipBLAS ======================= We welcome contributions to hipBLAS. Please follow these details to help ensure your contributions will be successfully accepted. Issue Discussion ================ Please use the GitHub Issues tab to notify us of issues. - Use your best judgment for issue creation. If your issue is already listed, upvote the issue and comment or post to provide additional details, such as how you reproduced this issue. - If you're not sure if your issue is the same, err on the side of caution and file your issue. You can add a comment to include the issue number (and link) for the similar issue. If we evaluate your issue as being the same as the existing issue, we'll close the duplicate. - If your issue doesn't exist, use the issue template to file a new issue. - When filing an issue, be sure to provide as much information as possible, including script output so we can collect information about your configuration. This helps reduce the time required to reproduce your issue. - Check your issue regularly, as we may require additional information to successfully reproduce the issue. - You may also open an issue to ask questions to the maintainers about whether a proposed change meets the acceptance criteria, or to discuss an idea pertaining to the library. Acceptance Criteria =================== Contributors wanting to submit improvements, or bug fixes should follow the below mentioned guidelines. Pull requests will be reviewed by members of `CODEOWNERS `__ Continuous Integration tests will be run on the pull request. Once the pull request is approved and tests pass it will be merged by a member of `CODEOWNERS `__ Attribution for your commit will be preserved when it is merged. Pull Request Guidelines ======================= By creating a pull request, you agree to the statements made in the `Code License`_ section. Your pull request should target the default branch. Our current default branch is the develop branch, which serves as our integration branch. Pull requests should: - ensure code builds successfully. - do not break existing test cases. - new functionality will only be merged with new unit tests. - new unit tests should integrate within the existing googletest framework. - tests must have good code coverage. - code must also have benchmark tests, and performance must approach the compute bound limit or memory bound limit. Deliverables ============ For each new file, please include the licensing header .. code:: cpp /******************************************************************************* * Copyright (c) 20xx Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * *******************************************************************************/ Process ======= hipBLAS uses the ``clang-format`` tool for formatting code in source files. To format a file, use: :: clang-format -style=file -i To format all files, run the following script in hipBLAS directory: :: #!/bin/bash git ls-files -z *.cc *.cpp *.h *.hpp *.cl *.h.in *.hpp.in *.cpp.in | xargs -0 clang-format -style=file -i Also, githooks can be installed to format the code per-commit: :: ./.githooks/install Code License ============ All code contributed to this project will be licensed under the license identified in the `LICENSE.md `__. Your contribution will be accepted under the same license. References ========== `hipBLAS documentation `__ hipBLAS-rocm-6.4.3/.github/ISSUE_TEMPLATE.md000066400000000000000000000004531500474223100177510ustar00rootroot00000000000000### What is the expected behavior - ### What actually happens - ### How to reproduce - ### Environment | Hardware | description | |-----|-----| | GPU | device string | | CPU | device string | | Software | version | |-----|-----| | ROCK | v0.0 | | ROCR | v0.0 | | HCC | v0.0 | | Library | v0.0 | hipBLAS-rocm-6.4.3/.github/PULL_REQUEST_TEMPLATE.md000066400000000000000000000000621500474223100210410ustar00rootroot00000000000000resolves #___ Summary of proposed changes: - - - hipBLAS-rocm-6.4.3/.github/dependabot.yml000066400000000000000000000011731500474223100200740ustar00rootroot00000000000000# To get started with Dependabot version updates, you'll need to specify which # package ecosystems to update and where the package manifests are located. # Please see the documentation for all configuration options: # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates version: 2 updates: - package-ecosystem: "pip" # See documentation for possible values directory: "/docs/sphinx" # Location of package manifests open-pull-requests-limit: 10 schedule: interval: "daily" labels: - "dependencies" - "ci:docs-only" reviewers: - "samjwu" hipBLAS-rocm-6.4.3/.gitignore000066400000000000000000000006711500474223100156760ustar00rootroot00000000000000# Compiled Object files *.slo *.lo *.o *.obj # Precompiled Headers *.gch *.pch # Compiled Dynamic libraries *.so *.dylib *.dll # Fortran module files *.mod *.smod # Compiled Static libraries *.lai *.la *.a *.lib # Executables *.exe *.out *.app # Editors .vscode # build-in-source directory build* # emacs temporary/backup files .\#* \#*\# *~ # documentation artifacts build/ _build/ _images/ _static/ _templates/ _toc.yml _doxygen/ hipBLAS-rocm-6.4.3/.jenkins/000077500000000000000000000000001500474223100154215ustar00rootroot00000000000000hipBLAS-rocm-6.4.3/.jenkins/common.groovy000066400000000000000000000102301500474223100201540ustar00rootroot00000000000000// This file is for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. def runCompileCommand(platform, project, jobName, boolean sameOrg=false) { project.paths.construct_build_prefix() def getDependenciesCommand = "" if (project.installLibraryDependenciesFromCI) { project.libraryDependencies.each { libraryName -> getDependenciesCommand += auxiliary.getLibrary(libraryName, platform.jenkinsLabel, null, sameOrg) } } if (env.BRANCH_NAME ==~ /PR-\d+/) { if (pullRequest.labels.contains("noSolver")) { project.paths.build_command = project.paths.build_command.replaceAll(' -c', ' -cn') } if (pullRequest.labels.contains("debug")) { project.paths.build_command = project.paths.build_command.replaceAll(' -c', ' -cg') } } String centos = platform.jenkinsLabel.contains('centos7') ? 'source scl_source enable devtoolset-7' : ':' def command = """#!/usr/bin/env bash set -x cd ${project.paths.project_build_prefix} ${getDependenciesCommand} ${centos} LD_LIBRARY_PATH=/opt/rocm/lib ${project.paths.build_command} """ platform.runCommand(this, command) } def runTestCommand (platform, project) { String sudo = auxiliary.sudo(platform.jenkinsLabel) String stagingDir = "${project.paths.project_build_prefix}/build/release/clients/staging" if (env.BRANCH_NAME ==~ /PR-\d+/) { if (pullRequest.labels.contains("debug")) { stagingDir = "${project.paths.project_build_prefix}/build/debug/clients/staging" } } String gtestCommonEnv = "HIPBLAS_CLIENT_RAM_GB_LIMIT=95" def command = """#!/usr/bin/env bash set -x cd ${stagingDir} ${sudo} LD_LIBRARY_PATH=/opt/rocm/lib ${gtestCommonEnv} GTEST_LISTENER=NO_PASS_LINE_IN_LOG ./hipblas-test --gtest_output=xml --gtest_color=yes """ platform.runCommand(this, command) // In an upcoming release, we are replacing hipblasDatatype_t with hipDataType. We have created hipblas_v2-test to test the new // interfaces while hipblasDatatype_t is deprecated. Thus, hipblas-test will be testing the old, deprecated, functions // using hipblasDatatype_t, and hipblas_v2-test will be testing the upcoming interfaces. def v2TestCommand = """#!/usr/bin/env bash set -x cd ${stagingDir} ${sudo} LD_LIBRARY_PATH=/opt/rocm/lib ${gtestCommonEnv} GTEST_LISTENER=NO_PASS_LINE_IN_LOG ./hipblas_v2-test --gtest_output=xml --gtest_color=yes """ platform.runCommand(this, v2TestCommand) def yamlTestCommand = """#!/usr/bin/env bash set -x cd ${stagingDir} ${sudo} LD_LIBRARY_PATH=/opt/rocm/lib ${gtestCommonEnv} GTEST_LISTENER=NO_PASS_LINE_IN_LOG ./hipblas-test --gtest_output=xml --gtest_color=yes --yaml hipblas_smoke.yaml """ platform.runCommand(this, yamlTestCommand) junit "${stagingDir}/*.xml" } def runPackageCommand(platform, project, jobName, label='') { def command label = label != '' ? '-' + label.toLowerCase() : '' String ext = platform.jenkinsLabel.contains('ubuntu') ? "deb" : "rpm" String dir = jobName.contains('Debug') ? "debug" : "release" if (env.BRANCH_NAME ==~ /PR-\d+/) { if (pullRequest.labels.contains("debug")) { dir = "debug" } } command = """ set -x cd ${project.paths.project_build_prefix}/build/${dir} make package mkdir -p package if [ ! -z "$label" ] then for f in hipblas*.$ext do mv "\$f" "hipblas${label}-\${f#*-}" done fi mv *.${ext} package/ """ platform.runCommand(this, command) platform.archiveArtifacts(this, """${project.paths.project_build_prefix}/build/${dir}/package/*.${ext}""") } return this hipBLAS-rocm-6.4.3/.jenkins/multicompiler.groovy000066400000000000000000000066061500474223100215650ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName, buildCommand, label-> def prj = new rocProject('hipBLAS', 'MultiCompiler') //customize for project prj.paths.build_command = buildCommand if (env.BRANCH_NAME ==~ /PR-\d+/ && pullRequest.labels.contains("noSolver")) { prj.libraryDependencies = ['hipBLAS-common', 'hipBLASLt', 'rocBLAS'] } else { prj.libraryDependencies = ['rocPRIM', 'hipBLAS-common', 'hipBLASLt', 'rocBLAS', 'rocSPARSE', 'rocSOLVER'] } // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = false def commonGroovy def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName) } def testCommand = { platform, project-> commonGroovy.runTestCommand(platform, project) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project, jobName, label) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } def setupCI(urlJobName, jobNameList, buildCommand, runCI, label) { jobNameList = auxiliary.appendJobNameList(jobNameList) jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(label + ' ' + jobName) { runCI(nodeDetails, jobName, buildCommand, label) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(label + ' ' + urlJobName) { runCI([ubuntu18:['gfx906']], urlJobName, buildCommand, label) } } } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], "rocm-docker":[]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), "rocm-docker":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx906']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } String hostBuildCommand = './install.sh -c --compiler=g++' String hipClangBuildCommand = './install.sh -c --compiler=/opt/rocm/bin/amdclang++' String clangBuildCommand = './install.sh -c --compiler=clang++' setupCI(urlJobName, jobNameList, hostBuildCommand, runCI, 'g++') setupCI(urlJobName, jobNameList, hipClangBuildCommand, runCI, 'amdclang++') } hipBLAS-rocm-6.4.3/.jenkins/precheckin-cuda.groovy000066400000000000000000000050621500474223100217200ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName, buildCommand, label-> def prj = new rocProject('hipBLAS', 'PreCheckin-CUDA') //customize for project prj.paths.build_command = buildCommand prj.libraryDependencies = ['hipBLAS-common'] // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = false def commonGroovy def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project, jobName, label) } def testCommand = { platform, project-> commonGroovy.runTestCommand(platform, project) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } def setupCI(urlJobName, jobNameList, buildCommand, runCI, label) { jobNameList = auxiliary.appendJobNameList(jobNameList) jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(label + ' ' + jobName) { runCI(nodeDetails, jobName, buildCommand, label) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(label + ' ' + urlJobName) { runCI(['ubuntu20-cuda11':['anycuda']], urlJobName, buildCommand, label) } } } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = [] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = [:] propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } String hostBuildCommand = './install.sh -c --compiler=g++ --cuda' setupCI(urlJobName, jobNameList, hostBuildCommand, runCI, 'g++') } hipBLAS-rocm-6.4.3/.jenkins/precheckin.groovy000066400000000000000000000067631500474223100210170ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName, buildCommand, label-> def prj = new rocProject('hipBLAS', 'PreCheckin') if (env.BRANCH_NAME ==~ /PR-\d+/ && pullRequest.labels.contains("noSolver")) { prj.libraryDependencies = ['hipBLAS-common', 'hipBLASLt', 'rocBLAS'] } else { prj.libraryDependencies = ['rocPRIM', 'hipBLAS-common', 'hipBLASLt', 'rocBLAS', 'rocSPARSE', 'rocSOLVER'] } if (env.BRANCH_NAME ==~ /PR-\d+/ && pullRequest.labels.contains('g++')) { buildCommand += ' --compiler=g++' } else if (env.BRANCH_NAME ==~ /PR-\d+/ && pullRequest.labels.contains('clang')) { buildCommand += ' --compiler=clang++' } else { // buildCommand += ' --compiler=amdclang++' # leave as default } //customize for project prj.paths.build_command = buildCommand // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = false def commonGroovy def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName) } def testCommand = { platform, project-> commonGroovy.runTestCommand(platform, project) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project, jobName, label) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } def setupCI(urlJobName, jobNameList, buildCommand, runCI, label) { jobNameList = auxiliary.appendJobNameList(jobNameList) jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(label + ' ' + jobName) { runCI(nodeDetails, jobName, buildCommand, label) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(label + ' ' + urlJobName) { runCI([ubuntu18:['gfx906']], urlJobName, buildCommand, label) } } } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], "rocm-docker":[]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), "rocm-docker":([ubuntu18:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx906']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } String hostBuildCommand = './install.sh -c' setupCI(urlJobName, jobNameList, hostBuildCommand, runCI, 'g++') } hipBLAS-rocm-6.4.3/.jenkins/static.groovy000066400000000000000000000051641500474223100201650ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName-> def prj = new rocProject('hipBLAS', 'static') prj.paths.build_command = './install.sh -cs --compiler=amdclang++' if (env.BRANCH_NAME ==~ /PR-\d+/ && pullRequest.labels.contains("noSolver")) { prj.libraryDependencies = ['hipBLAS-common', 'hipBLASLt', 'rocBLAS'] } else { prj.libraryDependencies = ['rocPRIM', 'hipBLAS-common', 'hipBLASLt', 'rocBLAS', 'rocSPARSE', 'rocSOLVER'] } // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = true def commonGroovy def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName, true) } def testCommand = { platform, project-> commonGroovy.runTestCommand(platform, project) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project, jobName) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(jobName) { runCI(nodeDetails, jobName) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(urlJobName) { runCI([ubuntu18:['gfx906']], urlJobName) } } } hipBLAS-rocm-6.4.3/.jenkins/staticanalysis.groovy000066400000000000000000000027401500474223100217260ustar00rootroot00000000000000#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCI = { nodeDetails, jobName-> def prj = new rocProject('hipBLAS', 'Static Analysis') // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = true boolean staticAnalysis = true buildProject(prj, formatCheck, nodes.dockerArray, null, null, null, staticAnalysis) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], "rocm-docker":[]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":[]] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(jobName) { runCI(nodeDetails, jobName) } } } hipBLAS-rocm-6.4.3/.readthedocs.yaml000066400000000000000000000005021500474223100171260ustar00rootroot00000000000000# Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details version: 2 sphinx: configuration: docs/conf.py formats: [htmlzip, pdf, epub] python: install: - requirements: docs/sphinx/requirements.txt build: os: ubuntu-22.04 tools: python: "3.10" hipBLAS-rocm-6.4.3/CHANGELOG.md000066400000000000000000000252031500474223100155150ustar00rootroot00000000000000# Changelog for hipBLAS Documentation for hipBLAS is available at [https://rocm.docs.amd.com/projects/hipBLAS/en/latest/](https://rocm.docs.amd.com/projects/hipBLAS/en/latest/). ## hipBLAS 2.4.0 for ROCm 6.4.0 ### Changed * Updated build dependencies ### Resolved issues * Fixed Windows reference library interface for rocSOLVER functions for hipBLAS clients ## hipBLAS 2.3.0 for ROCm 6.3.0 ### Added * Level 3 functions have an additional ILP64 API for both C and FORTRAN (_64 name suffix) with int64_t function arguments. ### Changed * amdclang is used as the default compiler instead of g++ * Added a dependency on the hipblas-common package ## hipBLAS 2.2.0 for ROCm 6.2.0 ### Additions * Level 2 functions have additional ILP64 API for both C and FORTRAN (_64 name suffix) with int64_t function arguments * Level 1 "_ex" functions have additional ILP64 API ### Changes * install.sh script invokes rmake.py script, along with various improvements within the build scripts * Library dependencies in install.sh script have been changed from "rocblas" and "rocsolver" to the development packages "rocblas-dev" and "rocsolver-dev" * Linux AOCL dependency updated to release 4.2 gcc build * Windows vcpkg dependencies updated to release 2024.02.14 ## hipBLAS 2.1.0 for ROCm 6.1.0 ### Additions * New build option to automatically use hipconfig --platform to determine HIP platform * Level 1 functions have additional ILP64 API for both C and Fortran (`_64` name suffix) with int64_t function arguments * New functions hipblasGetMathMode and hipblasSetMathMode ### Deprecations * USE_CUDA build option; use HIP_PLATFORM=amd or HIP_PLATFORM=nvidia to override hipconfig ### Changes * Some Level 2 function argument names have changed from `m` to `n` to match legacy BLAS; there was no change in implementation. * Updated client code to use YAML-based testing * Renamed `.doxygen` and `.sphinx` folders to `doxygen` and `sphinx`, respectively * Added CMake support for documentation ## hipBLAS 2.0.0 for ROCm 6.0.0 ### Additions * New option to define `HIPBLAS_USE_HIP_BFLOAT16` to switch API to use the `hip_bfloat16` type * New `hipblasGemmExWithFlags` API ### Deprecations * `hipblasDatatype_t`; use `hipDataType` instead * `hipblasComplex`; use `hipComplex` instead * `hipblasDoubleComplex`; use `hipDoubleComplex` instead * Use of `hipblasDatatype_t` for `hipblasGemmEx` for compute-type; use `hipblasComputeType_t` instead ### Removals * `hipblasXtrmm` (calculates B <- alpha * op(A) * B) has been replaced with `hipblasXtrmm` (calculates C <- alpha * op(A) * B) ## hipBLAS 1.1.0 for ROCm 5.7.0 ### Changes * Updated documentation requirements ### Dependencies * rocSOLVER now depends on rocSPARSE ## hipBLAS 1.0.0 for ROCm 5.6.0 ### Changes * Added const qualifier to hipBLAS functions (swap, sbmv, spmv, symv, trsm) where missing ### Removals * `hipblasInt8Datatype_t enum` * `hipblasSetInt8Datatype` * `hipblasGetInt8Datatype functions` ### Deprecations * In-place trmm will be replaced by trmm that includes both in-place and out-of-place functionality ## hipBLAS 0.54.0 for ROCm 5.5.0 ### Additions * Optional opt-in to use the `__half for hipblasHalf` type (with C++) when you define `HIPBLAS_USE_HIP_HALF` * added scripts to plot performance for multiple functions * data driven hipblas-bench and hipblas-test execution via external yaml format data files * client smoke test added for quick validation using command hipblas-test --yaml hipblas_smoke.yaml ### Fixes * Data type conversion functions support more rocBLAS and cuBLAS data types * `geqrf` now returns successfully when nullptrs are passed with n == 0 || m == 0 * `getrs` now returns successfully when given nullptrs with corresponding size = 0 * `getrs` gives info = -1 when transpose is not an expected type * `gels` now returns successfully when given nullptrs with corresponding size = 0 * `gels` now gives info = -1 when transpose is not in ('N', 'T') for real cases and not in ('N', 'C') for complex cases ### Changes * Changed reference code for Windows to OpenBLAS * hipBLAS client executables all now begin with the `hipblas-` prefix ## hipBLAS 0.53.0 for ROCm 5.4.0 ### Additions * Allow for selection of int8 data type * Added support for `hipblasXgels` and `hipblasXgelsStridedBatched` operations (with s,d,c,z precisions), only supported with rocBLAS backend * Added support for `hipblasXgelsBatched` operations (with s,d,c,z precisions) ## hipBLAS 0.52.0 for ROCm 5.3.0 ### Additions * New `--cudapath` option in `install.sh`, which allows you to specify the CUDA build you want to use * New `--installcuda` option in `install.sh` to install CUDA using a package manager (this can also be used with the new `--installcudaversion` option that allows you to specify the CUDA version you want to install) ### Fixes * `#includes` now support a compiler version * Fixed client dependency support in `install.sh` ## hipBLAS 0.51.0 for ROCm 5.2.0 ### Additions * New packages for test and benchmark executables on all supported operating systems using CPack * Added file and folder reorganization changes with backward compatibility support for `rocm-cmake` wrapper functions * Added user-specified initialization option to `hipblas-bench` ### Fixes * Version gathering in performance-measuring script ## hipBLAS 0.50.0 for ROCm 5.1.0 ### Additions * `hipblas-test` output now has library version and device information * New `--rocsolver-path` command line option that you can use to specify a path (absolute or relative) to the pre-built rocSOLVER * Added `--cmake_install` command line option to update CMake to the minimum version * Added `cmake-arg` parameter to pass in cmake arguments while building * ReadtheDocs infrastructure support for the hipBLAS documentation ### Fixes * Added `hipblasVersionMinor` (` hipblaseVersionMinor` remains for backwards compatibility) * Doxygen warnings in `hipblas.h` header file ### Changes * `rocblas-path` command line option can be specified as absolute or relative path * Help message improvements in `install.sh` and `rmake.py` * Updated GoogleTest dependency from 1.10.0 to 1.11.0 ## hipBLAS 0.49.0 for ROCm 5.0.0 ### Additions - `hipblas-bench` rocSOLVER functions - Added `ROCM_MATHLIBS_API_USE_HIP_COMPLEX` to opt-in to use `hipFloatComplex` and `hipDoubleComplex` - Compilation warning for future trmm changes - `hipblas.h` documentation - Added option to forgo pivoting for getrf and getri when ipiv is nullptr - Code coverage option ### Fixes * Use of incorrect `HIP_PATH` when building from source * Windows packaging * Allowing negative increments in `hipblas-bench` * Removed boost dependency ## hipBLAS 0.48.0 for ROCm 4.5.0 ### Additions - Additional support for `hipblas-bench` - `HIPBLAS_STATUS_UNKNOWN` for unsupported backend status codes ### Fixes * Avoid large offset overflow for `gemv` and `hemv` in `hipblas-test` ### Changes * Packaging has been split into a runtime package (`hipblas`) and a development package (`hipblas-devel`): The development package depends on the runtime package. When installing the runtime package, the package manager will suggest the installation of the development package to aid users transitioning from the previous version's combined package. This suggestion by package manager is for all supported operating systems (except CentOS 7) to aid in the transition. The `suggestion` feature in the runtime package is introduced as a deprecated feature and will be removed in a future ROCm release. ## hipBLAS 0.46.0 for ROCm 4.3.0 ### Additions * `hipblasStatusToString` ### Fixes * Added `catch()` blocks around API calls to prevent the leak of C++ exceptions ## hipBLAS 0.44.0 for ROCm 4.2.0 ### Additions * Updates for rocBLAS `gemm_ex` changes: When using the rocBLAS backend, hipBLAS queries the preferable layout of int8 data passed to `gemm_ex` and passes in the resulting flag (you must specify your preferred data format when calling `gemm_ex` with a rocBLAS backend) * Added `hipblas-bench` with support for `copy`, `swap`, and `scal` ## hipBLAS 0.42.0 for ROCm 4.1.0 ### Additions * Added the following functions, which include batched and strided-batched support with the rocBLAS backend: * `axpy_ex` * `dot_ex` * `nrm2_ex` * `rot_ex` * `scal_ex` ### Fixes * Complex unit test bug caused by incorrect `caxpy` and `zaxpy` function signatures ## hipBLAS 0.40.0 for ROCm 4.0.0 ### Additions * Added a changelog * `hipblas-bench`, with support for `gemv`, `trsm`, and `gemm` * rocSOLVER is now a CPack dependency ## hipBLAS 0.38.0 for ROCm 3.10.0 ### Additions * `hipblasSetAtomicsMode` and `hipblasGetAtomicsMode` * Build doesn't look for CUDA backend unless `--cuda` flag is passed ## hipBLAS 0.36.0 for ROCm 3.9.0 ### Additions * Device memory reallocates on demand ## hipBLAS 0.34.0 for ROCm 3.8.0 ### Additions * `--static` build flag allows the creation of a static library ## hipBLAS 0.32.0 for ROCm 3.7.0 ### Additions * `--rocblas-path` command line option to choose path to pre-built rocBLAS * `sgetriBatched` * `dgetriBatched` * `cgetriBatched` * `zgetriBatched` * `TrsmEx` * `TrsmBatchedEx` * `TrsmStridedBatchedEx` * `hipblasSetVectorAsync` * `hipblasGetVectorAsync` * `hipblasSetMatrixAsync` * `hipblasGetMatrixAsync` * Fortran support for `getrf`, `getrs`, `geqrf`, and all variants thereof ## hipBLAS 0.30.0 for ROCm 3.6.0 ### Additions * Added the following functions, which include batched and strided-batched support with the rocBLAS backend: * `stbsv`, `dtbsv`, `ctbsv`, `ztbsv` * `ssymm`, `dsymm`, `csymm`, `zsymm` * `cgeam`, `zgeam` * `chemm`, `zhemm` * `strtri`, `dtrtri`, `ctrtri`, `ztrtri * `sdgmm`, `ddgmm`, `cdgmm`, `zdgmm` * `GemmBatchedEx` and `GemmStridedBatchedEx` * Fortran support for BLAS functions ## hipBLAS 0.28.0 for ROCm 3.5.0 ### Additions * Added the following functions, which include batched and strided-batched support with the rocBLAS backend: * `sgbmv`, `dgbmv`, `cgbmv`, `zgbmv` * `chemv`, `zhemv` * `stbmv`, `dtbmv`, `ctbmv`, `ztbmv` * `strmv`, `trmv`, `ctrmv`, `ztrmv` * `chbmv`, `zhbmv` * `cher`, `zher` * `cher2`, `zher2` * `chpmv`, `zhpmv` * `chpr`, `zhpr` * `chpr2`, `zhpr2` * `ssbmv`, `dsbmv` * `sspmv`, `dspmv` * `ssymv`, `dsymv`, `csymv`, `zsymv` * `stpmv`, `dtpmv`, `ctpmv`, `ztpmv` * `cgeru`, `cgerc`, `zgeru`, `zgerc` * `sspr`, `dspr`, `cspr`, `zspr` * `sspr2`, `dspr2` * `csyr`, `zsyr` * `ssyr2`, `dsyr2`, `csyr2`, `zsyr2` * `stpsv`, `dtpsv`, `ctpsv`, `ztpsv` * `ctrsv`, `ztrsv` * `cherk`, `zherk` * `cherkx`, `zherkx` * `cher2k`, `zher2k` * `ssyrk`, `dsyrk`, `csyrk`, `zsyrk` * `ssyr2k`, `dsyr2k`, `csyr2k`, `zsyr2k` * `ssyrkx`, `dsyrkx`, `csyrkx`, `zsyrkx` * `ctrmm`, `ztrmm` * `ctrsm`, `ztrsm` hipBLAS-rocm-6.4.3/CMakeLists.txt000066400000000000000000000263421500474223100164510ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # ######################################################################## # ROCm software requires Ubuntu 16.04 or Fedora 24, which has cmake 3.5 cmake_minimum_required( VERSION 3.5 ) # We use C++14 features, this will add compile option: -std=c++14 set( CMAKE_CXX_STANDARD 17 ) # Build documentation option(BUILD_DOCS "Build documentation" OFF) # Consider removing this in the future # This should appear before the project command, because it does not use FORCE if( WIN32 ) set( CMAKE_INSTALL_PREFIX "${PROJECT_BINARY_DIR}/package" CACHE PATH "Install path prefix, prepended onto install directories" ) else( ) set( CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories" ) endif( ) # This has to be initialized before the project() command appears # Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. MSVC_IDE does not use CMAKE_BUILD_TYPE if( NOT DEFINED CMAKE_CONFIGURATION_TYPES AND NOT DEFINED CMAKE_BUILD_TYPE ) set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." ) endif() if (NOT WIN32) if ( NOT DEFINED CMAKE_Fortran_COMPILER AND NOT DEFINED ENV{FC} ) set( CMAKE_Fortran_COMPILER "gfortran" ) endif() set( fortran_language "Fortran" ) endif( ) project( hipblas LANGUAGES CXX ${fortran_language} ) if (NOT python) set(python "python3") # default for linux endif() # Append our library helper cmake path and the cmake path for hip (for convenience) # Users may override HIP path by specifying their own in CMAKE_MODULE_PATH list( APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${ROCM_PATH}/lib/cmake/hip /opt/rocm/lib/cmake/hip ${HIP_DIR}/cmake ) # NOTE: workaround until hip cmake modules fixes symlink logic in their config files; remove when fixed list( APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/lib/cmake/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip ) # This finds the rocm-cmake project, and installs it if not found # rocm-cmake contains common cmake code for rocm projects to help setup and install include(dependencies) set ( VERSION_STRING "2.4.0" ) rocm_setup_version( VERSION ${VERSION_STRING} ) option( BUILD_VERBOSE "Output additional build information" OFF ) option( BUILD_WITH_SOLVER "Add additional functions from rocSOLVER" ON ) if( BUILD_WITH_SOLVER ) add_definitions( -D__HIP_PLATFORM_SOLVER__ ) endif( ) # BUILD_SHARED_LIBS is a cmake built-in; we make it an explicit option such that it shows in cmake-gui option( BUILD_SHARED_LIBS "Build hipBLAS as a shared library" ON ) # Deprecated USE_CUDA option if(DEFINED USE_CUDA) if(USE_CUDA) set(ENV{HIP_PLATFORM} nvidia) message(DEPRECATED "USE_CUDA is deprecated (use environment variable HIP_PLATFORM=nvidia)") else() set(ENV{HIP_PLATFORM} amd) message(DEPRECATION "USE_CUDA is deprecated (use environment variable HIP_PLATFORM=amd)") endif() endif() # Hip headers required of all clients; clients use hip to allocate device memory find_package( hip CONFIG PATHS ${HIP_DIR} ${ROCM_PATH} /opt/rocm ) # support for cuda backend with hip < 6.0 if(NOT hip_FOUND) find_package( HIP MODULE REQUIRED ) list( APPEND HIP_INCLUDE_DIRS "${HIP_ROOT_DIR}/include" ) set(ENV{HIP_PLATFORM} nvidia) if (NOT HIP_PLATFORM) set(HIP_PLATFORM nvidia) endif() else() if (NOT HIP_PLATFORM) set(HIP_PLATFORM amd) endif() endif() if(HIP_PLATFORM STREQUAL nvidia) find_package( CUDA REQUIRED ) set( HIPBLAS_HIP_PLATFORM_COMPILER_DEFINES __HIP_PLATFORM_NVCC__ __HIP_PLATFORM_NVIDIA__ ) endif() option(BUILD_CODE_COVERAGE "Build with code coverage enabled" OFF) if(BUILD_CODE_COVERAGE) add_compile_options(-fprofile-arcs -ftest-coverage) add_link_options(--coverage) endif() option(BUILD_ADDRESS_SANITIZER "Build with address sanitizer enabled" OFF) if(BUILD_ADDRESS_SANITIZER) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -shared-libasan") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -shared-libasan") endif() # FOR HANDLING ENABLE/DISABLE OPTIONAL BACKWARD COMPATIBILITY for FILE/FOLDER REORG option(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY "Build with file/folder reorg with backward compatibility enabled" OFF) if(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY AND NOT WIN32) rocm_wrap_header_dir( ${CMAKE_SOURCE_DIR}/library/include PATTERNS "*.h" GUARDS SYMLINK WRAPPER WRAPPER_LOCATIONS ${CMAKE_INSTALL_INCLUDEDIR} ) endif() add_subdirectory( library ) include( clients/cmake/build-options.cmake ) # Build clients of the library if( BUILD_CLIENTS_SAMPLES OR BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS ) if(NOT CLIENTS_OS) rocm_set_os_id(CLIENTS_OS) string(TOLOWER "${CLIENTS_OS}" CLIENTS_OS) rocm_read_os_release(CLIENTS_OS_VERSION VERSION_ID) endif() message(STATUS "OS: ${CLIENTS_OS} ${CLIENTS_OS_VERSION}") set(GFORTRAN_RPM "libgfortran4") set(GFORTRAN_DEB "libgfortran4") if(CLIENTS_OS STREQUAL "centos" OR CLIENTS_OS STREQUAL "rhel") if(CLIENTS_OS_VERSION VERSION_GREATER_EQUAL "8") set(GFORTRAN_RPM "libgfortran") endif() elseif(CLIENTS_OS STREQUAL "ubuntu" AND CLIENTS_OS_VERSION VERSION_GREATER_EQUAL "20.04") set(GFORTRAN_DEB "libgfortran5") elseif(CLIENTS_OS STREQUAL "mariner" OR CLIENTS_OS STREQUAL "azurelinux") set(GFORTRAN_RPM "gfortran") endif() set( BUILD_CLIENTS ON ) rocm_package_setup_component(clients) rocm_package_setup_client_component(clients-common) if(BUILD_CLIENTS_TESTS) rocm_package_setup_client_component( tests DEPENDS COMPONENT clients-common DEB "${GFORTRAN_DEB}" RPM "${GFORTRAN_RPM}") endif() if(BUILD_CLIENTS_BENCHMARKS) rocm_package_setup_client_component( benchmarks DEPENDS COMPONENT clients-common DEB "${GFORTRAN_DEB}" RPM "${GFORTRAN_RPM}") endif() if(BUILD_CLIENTS_SAMPLES) rocm_package_setup_client_component( samples DEPENDS COMPONENT clients-common DEB "${GFORTRAN_DEB}" RPM "${GFORTRAN_RPM}") endif() add_subdirectory( clients ) endif( ) # Build docs if(BUILD_DOCS) add_subdirectory(docs) endif() # The following code is setting variables to control the behavior of CPack to generate our if( WIN32 ) set( CPACK_SOURCE_GENERATOR "ZIP" ) set( CPACK_GENERATOR "ZIP" ) endif( ) # Package specific CPACK vars if(HIP_PLATFORM STREQUAL amd) set(rocblas_minimum 4.4.0) set(rocsolver_minimum 3.28.0) rocm_package_add_dependencies(SHARED_DEPENDS "rocblas >= ${rocblas_minimum}" "rocsolver >= ${rocsolver_minimum}") rocm_package_add_rpm_dependencies(STATIC_DEPENDS "rocblas-static-devel >= ${rocblas_minimum}" "rocsolver-static-devel >= ${rocsolver_minimum}") rocm_package_add_deb_dependencies(STATIC_DEPENDS "rocblas-static-dev >= ${rocblas_minimum}" "rocsolver-static-dev >= ${rocsolver_minimum}") endif( ) set(hipblas_common_minimum 1.0.0) rocm_package_add_deb_dependencies(COMPONENT devel DEPENDS "hipblas-common-dev >= ${hipblas_common_minimum}") rocm_package_add_rpm_dependencies(COMPONENT devel DEPENDS "hipblas-common-devel >= ${hipblas_common_minimum}") set( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md" ) set( CPACK_RPM_PACKAGE_LICENSE "MIT") if (WIN32) SET( CMAKE_INSTALL_PREFIX "C:/hipSDK" CACHE PATH "Install path" FORCE ) SET( INSTALL_PREFIX "C:/hipSDK" ) SET( CPACK_SET_DESTDIR FALSE ) SET( CPACK_PACKAGE_INSTALL_DIRECTORY "C:/hipSDK" ) SET( CPACK_PACKAGING_INSTALL_PREFIX "" ) set( CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF ) else() if( NOT CPACK_PACKAGING_INSTALL_PREFIX ) set( CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}" ) endif() endif( ) set( CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "\${CPACK_PACKAGING_INSTALL_PREFIX}" ) # Give hipblas compiled for CUDA backend a different name if(HIP_PLATFORM STREQUAL amd) set( package_name hipblas ) else( ) set( package_name hipblas-alt ) endif( ) set( HIPBLAS_CONFIG_DIR "\${CPACK_PACKAGING_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}" CACHE PATH "Path placed into ldconfig file" ) rocm_create_package( NAME ${package_name} DESCRIPTION "ROCm BLAS marshalling library" MAINTAINER "hipBLAS Maintainer " LDCONFIG LDCONFIG_DIR ${HIPBLAS_CONFIG_DIR} ) # # ADDITIONAL TARGETS FOR CODE COVERAGE # if(BUILD_CODE_COVERAGE) # # > make coverage_cleanup (clean coverage related files.) # > make coverage GTEST_FILTER=<> # will run: # > make coverage_analysis GTEST_FILTER=<> (analyze tests) # > make coverage_output (generate html documentation) # # # Run coverage analysis # set(coverage_test ./clients/staging/hipblas-test) if (CMAKE_BUILD_TYPE STREQUAL "Debug") set(coverage_test ./clients/staging/hipblas-test-d) endif() add_custom_target(coverage_analysis COMMAND echo Coverage GTEST_FILTER=\${GTEST_FILTER} COMMAND ${coverage_test} --gtest_filter=\"\${GTEST_FILTER}\" WORKING_DIRECTORY ${CMAKE_BINARY_DIR} ) add_dependencies(coverage_analysis hipblas) # # Prepare coverage output # This little script is generated because the option '--gcov-tool ' of lcov cannot take arguments. # add_custom_target(coverage_output DEPENDS coverage_analysis COMMAND mkdir -p lcoverage COMMAND echo "\\#!/bin/bash" > llvm-gcov.sh COMMAND echo "\\# THIS FILE HAS BEEN GENERATED" >> llvm-gcov.sh COMMAND printf "exec /opt/rocm/llvm/bin/llvm-cov gcov $$\\@" >> llvm-gcov.sh COMMAND chmod +x llvm-gcov.sh ) # # Generate coverage output. # add_custom_command(TARGET coverage_output COMMAND lcov --directory . --base-directory . --gcov-tool ${CMAKE_BINARY_DIR}/llvm-gcov.sh --capture -o lcoverage/raw_main_coverage.info COMMAND lcov --remove lcoverage/raw_main_coverage.info "'/opt/*'" "'/usr/*'" -o lcoverage/main_coverage.info COMMAND genhtml lcoverage/main_coverage.info --output-directory lcoverage ) add_custom_target(coverage DEPENDS coverage_output) # # Coverage cleanup # add_custom_target(coverage_cleanup COMMAND find ${CMAKE_BINARY_DIR} -name *.gcda -delete WORKING_DIRECTORY ${CMAKE_BINARY_DIR} ) endif() hipBLAS-rocm-6.4.3/LICENSE.md000066400000000000000000000066611500474223100153170ustar00rootroot00000000000000MIT License Copyright (C) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. This product includes software from copyright holders as shown below, and distributed under their license terms as specified. Copyright (c) 1992-2022 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved. Copyright (c) 2000-2022 The University of California Berkeley. All rights reserved. Copyright (c) 2006-2022 The University of Colorado Denver. All rights reserved. $COPYRIGHT$ Additional copyrights may follow $HEADER$ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer listed in this license in the documentation and/or other materials provided with the distribution. - Neither the name of the copyright holders nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. The copyright holders provide no reassurances that the source code provided does not infringe any patent, copyright, or any other intellectual property rights of third parties. The copyright holders disclaim any liability to any recipient for claims brought against recipient by any third party for infringement of that parties intellectual property rights. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. hipBLAS-rocm-6.4.3/README.md000066400000000000000000000061041500474223100151620ustar00rootroot00000000000000# hipBLAS hipBLAS is a Basic Linear Algebra Subprograms (BLAS) marshalling library with multiple supported backends. It sits between your application and a 'worker' BLAS library, where it marshals inputs to the backend library and marshals results to your application. hipBLAS exports an interface that doesn't require the client to change, regardless of the chosen backend. Currently, hipBLAS supports rocBLAS and cuBLAS backends. To use hipBLAS, you must first install rocBLAS, rocSPARSE, and rocSOLVER or cuBLAS. ## Documentation > [!NOTE] > The published hipBLAS documentation is available at [hipBLAS](https://rocm.docs.amd.com/projects/hipBLAS/en/latest/index.html) in an organized, easy-to-read format, with search and a table of contents. The documentation source files reside in the hipBLAS/docs folder of this repository. As with all ROCm projects, the documentation is open source. For more information, see [Contribute to ROCm documentation](https://rocm.docs.amd.com/en/latest/contribute/contributing.html). To build our documentation locally, use the following code: ```bash cd docs pip3 install -r sphinx/requirements.txt python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html ``` Alternatively, build with CMake: ```bash cmake -DBUILD_DOCS=ON ... ``` ## Build and install 1. Download the hipBLAS source code (clone this repository): ```bash git clone https://github.com/ROCmSoftwarePlatform/hipBLAS.git ``` ```note hipBLAS requires specific versions of rocBLAS and rocSOLVER. Refer to [CMakeLists.txt](https://github.com/ROCmSoftwarePlatform/hipBLAS/blob/develop/library/CMakeLists.txt) for details. ``` 2. Build hipBLAS and install it into `/opt/rocm/hipblas`: ```bash cd hipblas ./install.sh -i ``` ## Interface examples The hipBLAS interface is compatible with rocBLAS and cuBLAS-v2 APIs. Porting a CUDA application that originally calls the cuBLAS API to an application that calls the hipBLAS API is relatively straightforward. For example, the hipBLAS SGEMV interface is: ### GEMV API ```c hipblasStatus_t hipblasSgemv( hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const float *alpha, const float *A, int lda, const float *x, int incx, const float *beta, float *y, int incy ); ``` ### Batched and strided GEMM API hipBLAS GEMM can process matrices in batches with regular strides by using the strided-batched version of the API: ```c hipblasStatus_t hipblasSgemmStridedBatched( hipblasHandle_t handle, hipblasOperation_t transa, hipblasOperation_t transb, int m, int n, int k, const float *alpha, const float *A, int lda, long long bsa, const float *B, int ldb, long long bsb, const float *beta, float *C, int ldc, long long bsc, int batchCount); ``` hipBLAS assumes matrix A and vectors x, y are allocated in GPU memory space filled with data. You are responsible for copying data to and from the host and device memory. hipBLAS-rocm-6.4.3/bump_develop_version.sh000077500000000000000000000013211500474223100204640ustar00rootroot00000000000000#!/bin/bash # For the develop branch, bump hipblas version and rocblas/rocsolver dependency versions OLD_HIPBLAS_VERSION="2.3.0" NEW_HIPBLAS_VERSION="2.4.0" OLD_MINIMUM_ROCBLAS_VERSION="4.3.0" NEW_MINIMUM_ROCBLAS_VERSION="4.4.0" OLD_MINIMUM_ROCSOLVER_VERSION="3.27.0" NEW_MINIMUM_ROCSOLVER_VERSION="3.28.0" OLD_SO_VERSION="hipblas_SOVERSION 2.3" NEW_SO_VERSION="hipblas_SOVERSION 2.4" sed -i "s/${OLD_HIPBLAS_VERSION}/${NEW_HIPBLAS_VERSION}/g" CMakeLists.txt sed -i "s/${OLD_MINIMUM_ROCBLAS_VERSION}/${NEW_MINIMUM_ROCBLAS_VERSION}/g" CMakeLists.txt sed -i "s/${OLD_MINIMUM_ROCSOLVER_VERSION}/${NEW_MINIMUM_ROCSOLVER_VERSION}/g" CMakeLists.txt sed -i "s/${OLD_SO_VERSION}/${NEW_SO_VERSION}/g" library/CMakeLists.txt hipBLAS-rocm-6.4.3/clients/000077500000000000000000000000001500474223100153435ustar00rootroot00000000000000hipBLAS-rocm-6.4.3/clients/CMakeLists.txt000066400000000000000000000272161500474223100201130ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # ######################################################################## # ROCm software requires Ubuntu 16.04 or Fedora 24, which has cmake 3.5 cmake_minimum_required( VERSION 3.5 ) function( get_lapack lapack_lib lapack_inc ) if(EXISTS "${BUILD_DIR}/deps/deps-install/lib/liblapack.a") set( lib "${BUILD_DIR}/deps/deps-install/lib/liblapack.a" ) set( inc "${BUILD_DIR}/deps/deps-install/include" ) set( ${cblas_inc} ${inc} PARENT_SCOPE ) else() find_package( lapack REQUIRED CONFIG ) set( lib "lapack" ) endif() set( ${lapack_lib} ${lib} PARENT_SCOPE ) endfunction( ) function( get_cblas cblas_libs cblas_inc ) if(LINK_BLIS) if(EXISTS "${BUILD_DIR}/deps/blis/lib/libblis.a") set( libs ${BUILD_DIR}/deps/blis/lib/libblis.a ) set( inc ${BUILD_DIR}/deps/blis/include/blis ) set( ${cblas_inc} ${inc} PARENT_SCOPE ) elseif(EXISTS "/usr/local/lib/libblis.a") set( libs /usr/local/lib/libblis.a ) set( inc /usr/local/include/blis ) set( ${cblas_inc} ${inc} PARENT_SCOPE ) endif() else() if(EXISTS "${BUILD_DIR}/deps/deps-install/lib/libcblas.a" AND EXISTS "${BUILD_DIR}/deps/deps-install/lib/libblas.a") set( libs ${BUILD_DIR}/deps/deps-install/lib/libcblas.a ${BUILD_DIR}/deps/deps-install/lib/libblas.a ) set( inc "${BUILD_DIR}/deps/deps-install/include" ) set( ${cblas_inc} ${inc} PARENT_SCOPE ) else() find_package( cblas REQUIRED CONFIG ) set( libs cblas blas ) endif() endif() set( ${cblas_libs} ${libs} PARENT_SCOPE ) endfunction( ) # Consider removing this in the future # This should appear before the project command, because it does not use FORCE if( WIN32 ) set( CMAKE_INSTALL_PREFIX "${PROJECT_BINARY_DIR}/package" CACHE PATH "Install path prefix, prepended onto install directories" ) else( ) set( CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories" ) endif( ) # This has to be initialized before the project() command appears # Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. MSVC_IDE does not use CMAKE_BUILD_TYPE if( NOT DEFINED CMAKE_CONFIGURATION_TYPES AND NOT DEFINED CMAKE_BUILD_TYPE ) set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." ) endif() # This project may compile dependencies for clients project( hipblas-clients LANGUAGES CXX Fortran ) # We use C++14 features, this will add compile option: -std=c++14 set( CMAKE_CXX_STANDARD 17 ) list( APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ) include( build-options ) if( NOT WIN32 ) set(hipblas_f90_source_clients_no_solver include/hipblas_fortran_no_solver_module.f90 ) set(hipblas_f90_source_clients_solver include/hipblas_fortran_module.f90 ) endif() if( BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS OR BUILD_CLIENTS_SAMPLES ) if( NOT WIN32 ) if( BUILD_WITH_SOLVER ) add_library(hipblas_fortran_client STATIC ${hipblas_f90_source_clients_solver}) else() add_library(hipblas_fortran_client STATIC ${hipblas_f90_source_clients_no_solver}) endif() add_dependencies(hipblas_fortran_client hipblas_fortran) endif() include_directories(${CMAKE_BINARY_DIR}/include/hipblas) include_directories(${CMAKE_BINARY_DIR}/include) endif( ) if( BUILD_CLIENTS_SAMPLES ) add_subdirectory( samples ) endif( ) if( BUILD_CLIENTS_BENCHMARKS OR BUILD_CLIENTS_TESTS) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) # if it fails to find OpenMP compile and link flags in strange configurations it can just use non-parallel reference computation # if there is no omp.h to find the client compilation will fail and this should be obvious, used to be REQUIRED find_package(OpenMP) if (TARGET OpenMP::OpenMP_CXX) set( COMMON_LINK_LIBS "OpenMP::OpenMP_CXX") if(HIP_PLATFORM STREQUAL amd) list( APPEND COMMON_LINK_LIBS "-L\"${HIP_CLANG_ROOT}/lib\"") if (NOT WIN32) list( APPEND COMMON_LINK_LIBS "-Wl,-rpath=${HIP_CLANG_ROOT}/lib -lomp") else() list( APPEND COMMON_LINK_LIBS "libomp") endif() endif() endif() if (TARGET Threads::Threads) list( APPEND COMMON_LINK_LIBS "Threads::Threads") endif() set( COMMON_DEFINES HIPBLAS_BFLOAT16_CLASS ROCM_USE_FLOAT16 HIPBLAS_NO_DEPRECATED_WARNINGS ${HIPBLAS_HIP_PLATFORM_COMPILER_DEFINES} ) message(STATUS "CLIENT COMMON_DEFINES: ${COMMON_DEFINES}") message(STATUS "CLIENT COMMON CXX_OPTIONS: ${COMMON_CXX_OPTIONS}") message(STATUS "CLIENT COMMON LINK: ${COMMON_LINK_LIBS}") if( NOT WIN32 ) if (LINK_BLIS) if(EXISTS "${BUILD_DIR}/deps/amd-blis/lib/ILP64/libflame.a" AND EXISTS "${BUILD_DIR}/deps/amd-blis/lib/ILP64/libblis-mt.a") # 4.0 and 4.1.0 set( BLAS_LIBRARY ${BUILD_DIR}/deps/amd-blis/lib/ILP64/libflame.a ${BUILD_DIR}/deps/amd-blis/lib/ILP64/libblis-mt.a ) set( BLIS_INCLUDE_DIR ${BUILD_DIR}/deps/amd-blis/include/ILP64 ) list( APPEND COMMON_DEFINES "FLA_ENABLE_ILP64") elseif(EXISTS "/opt/AMD/aocl/aocl-linux-gcc-4.2.0/gcc/lib_ILP64/libflame.a" AND EXISTS "/opt/AMD/aocl/aocl-linux-gcc-4.2.0/gcc/lib_ILP64/libblis-mt.a" AND EXISTS "/opt/AMD/aocl/aocl-linux-gcc-4.2.0/gcc/lib_ILP64/libaoclutils.a" ) set( BLAS_LIBRARY -Wl,--allow-multiple-definition /opt/AMD/aocl/aocl-linux-gcc-4.2.0/gcc/lib_ILP64/libflame.a /opt/AMD/aocl/aocl-linux-gcc-4.2.0/gcc/lib_ILP64/libblis-mt.a /opt/AMD/aocl/aocl-linux-gcc-4.2.0/gcc/lib_ILP64/libaoclutils.a ) set( BLIS_INCLUDE_DIR /opt/AMD/aocl/aocl-linux-gcc-4.2.0/gcc/include_ILP64/ ) list( APPEND COMMON_DEFINES "FLA_ENABLE_ILP64") elseif(EXISTS "/opt/AMD/aocl/aocl-linux-aocc-4.1.0/aocc/lib_ILP64/libflame.a" AND EXISTS "/opt/AMD/aocl/aocl-linux-aocc-4.1.0/aocc/lib_ILP64/libblis-mt.a" ) set( BLAS_LIBRARY /opt/AMD/aocl/aocl-linux-aocc-4.1.0/aocc/lib_ILP64/libflame.a /opt/AMD/aocl/aocl-linux-aocc-4.1.0/aocc/lib_ILP64/libblis-mt.a ) set( BLIS_INCLUDE_DIR /opt/AMD/aocl/aocl-linux-aocc-4.1.0/aocc/include_ILP64/ ) list( APPEND COMMON_DEFINES "FLA_ENABLE_ILP64") elseif(EXISTS "/opt/AMD/aocl/aocl-linux-aocc-4.0/lib_ILP64/libflame.a" AND EXISTS "/opt/AMD/aocl/aocl-linux-aocc-4.0/lib_ILP64/libblis-mt.a" ) set( BLAS_LIBRARY /opt/AMD/aocl/aocl-linux-aocc-4.0/lib_ILP64/libflame.a /opt/AMD/aocl/aocl-linux-aocc-4.0/lib_ILP64/libblis-mt.a ) set( BLIS_INCLUDE_DIR /opt/AMD/aocl/aocl-linux-aocc-4.0/include_ILP64/ ) list( APPEND COMMON_DEFINES "FLA_ENABLE_ILP64") else() # fallbacks include earlier blis get_lapack( LAPACK_LIB LAPACK_INCLUDE_DIR ) get_cblas( BLAS_LIBRARY BLAS_INCLUDE_DIR ) list( PREPEND BLAS_LIBRARY ${LAPACK_LIB} ) endif() else() # Linking lapack library requires fortran flags get_lapack( LAPACK_LIB LAPACK_INCLUDE_DIR ) get_cblas( BLAS_LIBRARY BLAS_INCLUDE_DIR ) list( PREPEND BLAS_LIBRARY ${LAPACK_LIB} ) endif() else() # WIN32 file(TO_CMAKE_PATH "C:/Program\ Files/AMD/AOCL-Windows" AOCL_ROOT) if (LINK_BLIS AND EXISTS "C:/Program\ Files/AMD/AOCL-Windows/amd-blis/lib/ILP64/AOCL-LibBlis-Win-MT.lib" AND EXISTS "C:/Program\ Files/AMD/AOCL-Windows/amd-libflame/lib/ILP64/AOCL-LibFlame-Win-MT.lib" AND EXISTS "C:/Program\ Files/AMD/AOCL-Windows/amd-utils/lib/libaoclutils_static.lib" ) set( BLAS_LIBRARY "-l\"C:/Program\ Files/AMD/AOCL-Windows/amd-blis/lib/ILP64/AOCL-LibBlis-Win-MT\"" "-l\"C:/Program\ Files/AMD/AOCL-Windows/amd-libflame/lib/ILP64/AOCL-LibFlame-Win-MT\"" "-l\"C:/Program\ Files/AMD/AOCL-Windows/amd-utils/lib/libaoclutils_static\"" ) set( BLIS_INCLUDE_DIR "C:/Program\ Files/AMD/AOCL-Windows/amd-blis/include/ILP64" ) set( FLAME_INCLUDE_DIR "C:/Program\ Files/AMD/AOCL-Windows/amd-libflame/include/ILP64" ) set( BLIS_DEFINES BLIS_ENABLE_NO_UNDERSCORE_API BLIS_ENABLE_CBLAS ) else() set( BLAS_INCLUDE_DIR ${OPENBLAS_DIR}/include CACHE PATH "OpenBLAS library include path" ) find_library( BLAS_LIBRARY libopenblas PATHS ${OPENBLAS_DIR}/lib NO_DEFAULT_PATH ) if (NOT BLAS_LIBRARY) find_package( OPENBLAS CONFIG REQUIRED ) set( BLAS_LIBRARY OpenBLAS::OpenBLAS ) set( BLAS_INCLUDE_DIR "" ) endif() endif() endif() if ( DEFINED BLIS_INCLUDE_DIR ) set( BLIS_CPP ../common/blis_interface.cpp ) endif() message(STATUS "Build Dir: ${BUILD_DIR}") message(STATUS "Linking Ref. Libs: ${BLAS_LIBRARY}") if( BUILD_CLIENTS_TESTS ) add_subdirectory( gtest ) endif( ) if( BUILD_CLIENTS_BENCHMARKS ) add_subdirectory( benchmarks ) endif( ) endif() set( HIPBLAS_COMMON "${PROJECT_BINARY_DIR}/staging/hipblas_common.yaml") add_custom_command( OUTPUT "${HIPBLAS_COMMON}" COMMAND ${CMAKE_COMMAND} -E copy include/hipblas_common.yaml "${HIPBLAS_COMMON}" DEPENDS include/hipblas_common.yaml WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" ) set( HIPBLAS_TEMPLATE "${PROJECT_BINARY_DIR}/staging/hipblas_template.yaml") add_custom_command( OUTPUT "${HIPBLAS_TEMPLATE}" COMMAND ${CMAKE_COMMAND} -E copy include/hipblas_template.yaml "${HIPBLAS_TEMPLATE}" DEPENDS include/hipblas_template.yaml WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" ) set( HIPBLAS_SMOKE "${PROJECT_BINARY_DIR}/staging/hipblas_smoke.yaml") add_custom_command( OUTPUT "${HIPBLAS_SMOKE}" COMMAND ${CMAKE_COMMAND} -E copy include/hipblas_smoke.yaml "${HIPBLAS_SMOKE}" DEPENDS include/hipblas_smoke.yaml WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" ) set( HIPBLAS_GENTEST "${PROJECT_BINARY_DIR}/staging/hipblas_gentest.py") add_custom_command( OUTPUT "${HIPBLAS_GENTEST}" COMMAND ${CMAKE_COMMAND} -E copy common/hipblas_gentest.py "${HIPBLAS_GENTEST}" DEPENDS common/hipblas_gentest.py WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" ) add_custom_target( hipblas-common DEPENDS "${HIPBLAS_COMMON}" "${HIPBLAS_TEMPLATE}" "${HIPBLAS_SMOKE}" "${HIPBLAS_GENTEST}" ) if( BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS ) rocm_install( FILES ${HIPBLAS_COMMON} ${HIPBLAS_TEMPLATE} ${HIPBLAS_SMOKE} DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT clients-common ) rocm_install( PROGRAMS ${HIPBLAS_GENTEST} DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT clients-common ) endif() hipBLAS-rocm-6.4.3/clients/benchmarks/000077500000000000000000000000001500474223100174605ustar00rootroot00000000000000hipBLAS-rocm-6.4.3/clients/benchmarks/CMakeLists.txt000066400000000000000000000131651500474223100222260ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # ######################################################################## set( THREADS_PREFER_PTHREAD_FLAG ON ) find_package( Threads REQUIRED ) # Linking lapack library requires fortran flags enable_language( Fortran ) set(hipblas_bench_source client.cpp) if( NOT TARGET hipblas ) find_package( hipblas REQUIRED CONFIG PATHS /opt/rocm/hipblas ) endif( ) set( hipblas_benchmark_common ../common/utility.cpp ../common/cblas_interface.cpp ../common/clients_common.cpp ../common/hipblas_arguments.cpp ../common/hipblas_parse_data.cpp ../common/hipblas_datatype2string.cpp ../common/norm.cpp ../common/unit.cpp ../common/near.cpp ../common/arg_check.cpp ../common/argument_model.cpp ../common/hipblas_template_specialization.cpp ../common/host_alloc.cpp ${BLIS_CPP} ) add_executable( hipblas-bench ${hipblas_bench_source} ${hipblas_benchmark_common} ) add_executable( hipblas_v2-bench ${hipblas_bench_source} ${hipblas_benchmark_common} ) target_compile_features( hipblas-bench PRIVATE cxx_static_assert cxx_nullptr cxx_auto_type ) target_compile_features( hipblas_v2-bench PRIVATE cxx_static_assert cxx_nullptr cxx_auto_type ) # Internal header includes target_include_directories( hipblas-bench PRIVATE $ ) target_include_directories( hipblas_v2-bench PRIVATE $ ) # External header includes included as system files target_include_directories( hipblas-bench SYSTEM PRIVATE $ $ $ $ $ ) target_include_directories( hipblas_v2-bench SYSTEM PRIVATE $ $ $ $ $ ) target_link_libraries( hipblas-bench PRIVATE roc::hipblas ) target_link_libraries( hipblas_v2-bench PRIVATE roc::hipblas ) if (NOT WIN32) target_link_libraries( hipblas-bench PRIVATE hipblas_fortran_client ) target_link_libraries( hipblas_v2-bench PRIVATE hipblas_fortran_client ) endif() # need mf16c flag for float->half convertion target_compile_options( hipblas-bench PRIVATE -mf16c ) # -Wno-deprecated-declarations ) target_compile_options( hipblas_v2-bench PRIVATE -mf16c ) # -Wno-deprecated-declarations ) target_compile_options(hipblas-bench PRIVATE $<$:${COMMON_CXX_OPTIONS}>) target_compile_options(hipblas_v2-bench PRIVATE $<$:${COMMON_CXX_OPTIONS}>) target_compile_definitions( hipblas-bench PRIVATE HIPBLAS_BENCH ${COMMON_DEFINES} ${BLIS_DEFINES} ) target_compile_definitions( hipblas_v2-bench PRIVATE HIPBLAS_BENCH ${COMMON_DEFINES} ${BLIS_DEFINES} HIPBLAS_V2 ) target_link_libraries( hipblas-bench PRIVATE ${BLAS_LIBRARY} ${COMMON_LINK_LIBS} ) target_link_libraries( hipblas_v2-bench PRIVATE ${BLAS_LIBRARY} ${COMMON_LINK_LIBS} ) if (NOT WIN32) target_link_libraries( hipblas-bench PRIVATE stdc++fs ) target_link_libraries( hipblas_v2-bench PRIVATE stdc++fs ) endif() if(HIP_PLATFORM STREQUAL amd) target_link_libraries( hipblas-bench PRIVATE hip::host ) target_link_libraries( hipblas_v2-bench PRIVATE hip::host ) if( CUSTOM_TARGET ) target_link_libraries( hipblas-bench PRIVATE hip::${CUSTOM_TARGET} ) target_link_libraries( hipblas_v2-bench PRIVATE hip::${CUSTOM_TARGET} ) endif() else( ) target_include_directories( hipblas-bench PRIVATE $ ) target_include_directories( hipblas_v2-bench PRIVATE $ ) target_link_libraries( hipblas-bench PRIVATE ${CUDA_LIBRARIES} ) target_link_libraries( hipblas_v2-bench PRIVATE ${CUDA_LIBRARIES} ) endif( ) set_target_properties( hipblas-bench PROPERTIES CXX_EXTENSIONS OFF RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) set_target_properties( hipblas_v2-bench PROPERTIES CXX_EXTENSIONS OFF RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) add_dependencies( hipblas-bench hipblas-common ) add_dependencies( hipblas_v2-bench hipblas-common ) rocm_install(TARGETS hipblas-bench COMPONENT benchmarks) rocm_install(TARGETS hipblas_v2-bench COMPONENT benchmarks) hipBLAS-rocm-6.4.3/clients/benchmarks/client.cpp000066400000000000000000000412221500474223100214430ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2023 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "program_options.hpp" #include "hipblas.hpp" #include "argument_model.hpp" #include "clients_common.hpp" #include "hipblas_data.hpp" #include "hipblas_datatype2string.hpp" #include "hipblas_parse_data.hpp" #include "hipblas_test.hpp" #include "test_cleanup.hpp" #include "type_dispatch.hpp" #include "utility.h" #include #include #include #include #include #include #include #include #include #include using namespace roc; // For emulated program_options int hipblas_bench_datafile() { int ret = 0; for(Arguments arg : HipBLAS_TestData()) ret |= run_bench_test(arg, 0, 1); test_cleanup::cleanup(); return ret; } void thread_init_device(int id, const Arguments& arg) { int count; CHECK_HIP_ERROR(hipGetDeviceCount(&count)); if(id < count) CHECK_HIP_ERROR(hipSetDevice(id)); Arguments a(arg); a.cold_iters = 1; a.iters = 0; run_bench_test(a, 0, 1); } void thread_run_bench(int id, const Arguments& arg) { int count; CHECK_HIP_ERROR(hipGetDeviceCount(&count)); if(id < count) CHECK_HIP_ERROR(hipSetDevice(id)); Arguments a(arg); run_bench_test(a, 0, 1); } int run_bench_multi_gpu_test(int parallel_devices, Arguments& arg) { int count; CHECK_HIP_ERROR(hipGetDeviceCount(&count)); if(parallel_devices > count || parallel_devices < 1) return 1; // initialization auto thread_init = std::make_unique(parallel_devices); for(int id = 0; id < parallel_devices; ++id) thread_init[id] = std::thread(::thread_init_device, id, arg); for(int id = 0; id < parallel_devices; ++id) thread_init[id].join(); // synchronzied launch of cold & hot calls auto thread = std::make_unique(parallel_devices); for(int id = 0; id < parallel_devices; ++id) thread[id] = std::thread(::thread_run_bench, id, arg); for(int id = 0; id < parallel_devices; ++id) thread[id].join(); return 0; } // Replace --batch with --batch_count for backward compatibility void fix_batch(int argc, char* argv[]) { static char b_c[] = "--batch_count"; for(int i = 1; i < argc; ++i) if(!strcmp(argv[i], "--batch")) { static int once = (std::cerr << argv[0] << " warning: --batch is deprecated, and --batch_count " "should be used instead." << std::endl, 0); argv[i] = b_c; } } int main(int argc, char* argv[]) try { fix_batch(argc, argv); Arguments arg; std::string function; std::string precision; std::string a_type; std::string b_type; std::string c_type; std::string d_type; std::string compute_type; std::string compute_type_gemm; std::string initialization; int device_id; int parallel_devices; int32_t api = 0; bool fortran = false; bool datafile = hipblas_parse_data(argc, argv); bool atomics_not_allowed = false; bool log_function_name = false; bool log_datatype = false; options_description desc("hipblas-bench command line options"); // clang-format off desc.add_options() ("sizem,m", value(&arg.M)->default_value(128), "Specific matrix size: sizem is only applicable to BLAS-2 & BLAS-3: the number of " "rows or columns in matrix.") ("sizen,n", value(&arg.N)->default_value(128), "Specific matrix/vector size: BLAS-1: the length of the vector. BLAS-2 & " "BLAS-3: the number of rows or columns in matrix") ("sizek,k", value(&arg.K)->default_value(128), "Specific matrix size: BLAS-2: the number of sub or super-diagonals of A. BLAS-3: " "the number of columns in A and rows in B.") ("kl", value(&arg.KL)->default_value(128), "Specific matrix size: kl is only applicable to BLAS-2: The number of sub-diagonals " "of the banded matrix A.") ("ku", value(&arg.KU)->default_value(128), "Specific matrix size: ku is only applicable to BLAS-2: The number of super-diagonals " "of the banded matrix A.") ("lda", value(&arg.lda)->default_value(128), "Leading dimension of matrix A, is only applicable to BLAS-2 & BLAS-3.") ("ldb", value(&arg.ldb)->default_value(128), "Leading dimension of matrix B, is only applicable to BLAS-2 & BLAS-3.") ("ldc", value(&arg.ldc)->default_value(128), "Leading dimension of matrix C, is only applicable to BLAS-2 & BLAS-3.") ("ldd", value(&arg.ldd)->default_value(128), "Leading dimension of matrix D, is only applicable to BLAS-EX ") ("stride_a", value(&arg.stride_a)->default_value(128*128), "Specific stride of strided_batched matrix A, is only applicable to strided batched" "BLAS-2 and BLAS-3: second dimension * leading dimension.") ("stride_b", value(&arg.stride_b)->default_value(128*128), "Specific stride of strided_batched matrix B, is only applicable to strided batched" "BLAS-2 and BLAS-3: second dimension * leading dimension.") ("stride_c", value(&arg.stride_c)->default_value(128*128), "Specific stride of strided_batched matrix C, is only applicable to strided batched" "BLAS-2 and BLAS-3: second dimension * leading dimension.") ("stride_d", value(&arg.stride_d)->default_value(128*128), "Specific stride of strided_batched matrix D, is only applicable to strided batched" "BLAS_EX: second dimension * leading dimension.") ("stride_x", value(&arg.stride_x)->default_value(128), "Specific stride of strided_batched vector x, is only applicable to strided batched" "BLAS_2: second dimension.") ("stride_y", value(&arg.stride_y)->default_value(128), "Specific stride of strided_batched vector y, is only applicable to strided batched" "BLAS_2: leading dimension.") ("incx", value(&arg.incx)->default_value(1), "increment between values in x vector") ("incy", value(&arg.incy)->default_value(1), "increment between values in y vector") ("alpha", value(&arg.alpha)->default_value(1.0), "specifies the scalar alpha") ("alphai", value(&arg.alphai)->default_value(0.0), "specifies the imaginary part of the scalar alpha") ("beta", value(&arg.beta)->default_value(0.0), "specifies the scalar beta") ("betai", value(&arg.betai)->default_value(0.0), "specifies the imaginary part of the scalar beta") ("function,f", value(&function), "BLAS function to test.") ("precision,r", value(&precision)->default_value("f32_r"), "Precision. " "Options: h,s,d,c,z,f16_r,f32_r,f64_r,bf16_r,f32_c,f64_c,i8_r,i32_r") ("a_type", value(&a_type), "Precision of matrix A. " "Options: h,s,d,c,z,f16_r,f32_r,f64_r,bf16_r,f32_c,f64_c,i8_r,i32_r") ("b_type", value(&b_type), "Precision of matrix B. " "Options: h,s,d,c,z,f16_r,f32_r,f64_r,bf16_r,f32_c,f64_c,i8_r,i32_r") ("c_type", value(&c_type), "Precision of matrix C. " "Options: h,s,d,c,z,f16_r,f32_r,f64_r,bf16_r,f32_c,f64_c,i8_r,i32_r") ("d_type", value(&d_type), "Precision of matrix D. " "Options: h,s,d,c,z,f16_r,f32_r,f64_r,bf16_r,f32_c,f64_c,i8_r,i32_r") ("compute_type", value(&compute_type), "Precision of computation. See compute_type_gemm for gemm_ex" "Options: h,s,d,c,z,f16_r,f32_r,f64_r,bf16_r,f32_c,f64_c,i8_r,i32_r") ("compute_type_gemm", value(&compute_type_gemm), "Precision of computation for gemm_ex with HIPBLAS_V2 define" "Options: c16f,c16f_pedantic,c32f,c32f_pedantic,c32f_fast_16f,c32f_fast_16bf,c32f_fast_tf32,c64f,c64f_pedantic,c32i,c32i_pedantic") ("initialization", value(&initialization)->default_value("hpl"), "Intialize with random integers, trig functions sin and cos, or hpl-like input. " "Options: rand_int, trig_float, hpl") ("transposeA", value(&arg.transA)->default_value('N'), "N = no transpose, T = transpose, C = conjugate transpose") ("transposeB", value(&arg.transB)->default_value('N'), "N = no transpose, T = transpose, C = conjugate transpose") ("side", value(&arg.side)->default_value('L'), "L = left, R = right. Only applicable to certain routines") ("uplo", value(&arg.uplo)->default_value('U'), "U = upper, L = lower. Only applicable to certain routines") // xsymv xsyrk xsyr2k xtrsm xtrsm_ex // xtrmm xtrsv ("diag", value(&arg.diag)->default_value('N'), "U = unit diagonal, N = non unit diagonal. Only applicable to certain routines") // xtrsm xtrsm_ex xtrsv xtrmm ("batch_count", value(&arg.batch_count)->default_value(1), "Number of matrices. Only applicable to batched and strided_batched routines") ("inplace", value(&arg.inplace)->default_value(false), "Whether or not to use the in place version of the algorithm. Only applicable to trmm routines") ("verify,v", value(&arg.norm_check)->default_value(0), "Validate GPU results with CPU? 0 = No, 1 = Yes (default: No)") ("iters,i", value(&arg.iters)->default_value(10), "Iterations to run inside timing loop") ("cold_iters,j", value(&arg.cold_iters)->default_value(2), "Cold Iterations to run before entering the timing loop") ("algo", value(&arg.algo)->default_value(0), "extended precision gemm algorithm") ("solution_index", value(&arg.solution_index)->default_value(0), "extended precision gemm solution index") ("flags", value(&arg.flags)->default_value(0), "gemm_ex flags") ("atomics_not_allowed", bool_switch(&atomics_not_allowed)->default_value(false), "Atomic operations with non-determinism in results are not allowed") ("device", value(&device_id)->default_value(0), "Set default device to be used for subsequent program runs") ("parallel_devices", value(¶llel_devices)->default_value(0), "Set number of devices used for parallel runs (device 0 to parallel_devices-1)") // ("c_noalias_d", // bool_switch(&arg.c_noalias_d)->default_value(false), // "C and D are stored in separate memory") ("log_function_name", bool_switch(&log_function_name)->default_value(false), "Function name precedes other itmes.") ("log_datatype", bool_switch(&log_datatype)->default_value(false), "Include datatypes used in output.") ("fortran", bool_switch(&fortran)->default_value(false), "Run using Fortran interface") ("api", value(&api)->default_value(0), "Use API, supercedes fortran flag (0==C, 1==C_64, ...)") ("help,h", "produces this help message"); //("version", "Prints the version number"); // clang-format on variables_map vm; store(parse_command_line(argc, argv, desc), vm); notify(vm); if((argc <= 1 && !datafile) || vm.count("help")) { std::cout << desc << std::endl; return 0; } // if(vm.find("version") != vm.end()) // { // char blas_version[100]; // hipblas_get_version_string(blas_version, sizeof(blas_version)); // std::cout << "hipBLAS version: " << blas_version << std::endl; // return 0; // } // transfer local variable state arg.atomics_mode = atomics_not_allowed ? HIPBLAS_ATOMICS_NOT_ALLOWED : HIPBLAS_ATOMICS_ALLOWED; if(api) arg.api = hipblas_client_api(api); else if(fortran) arg.api = FORTRAN; ArgumentModel_set_log_function_name(log_function_name); ArgumentModel_set_log_datatype(log_datatype); // Device Query int device_count = query_device_property(); std::cout << std::endl; if(device_count <= device_id) throw std::invalid_argument("Invalid Device ID"); set_device(device_id); if(datafile) return hipblas_bench_datafile(); std::transform(precision.begin(), precision.end(), precision.begin(), ::tolower); auto prec = string2hipblas_datatype(precision); if(prec == HIPBLAS_DATATYPE_INVALID) throw std::invalid_argument("Invalid value for --precision " + precision); arg.a_type = a_type == "" ? prec : string2hipblas_datatype(a_type); if(arg.a_type == HIPBLAS_DATATYPE_INVALID) throw std::invalid_argument("Invalid value for --a_type " + a_type); arg.b_type = b_type == "" ? prec : string2hipblas_datatype(b_type); if(arg.b_type == HIPBLAS_DATATYPE_INVALID) throw std::invalid_argument("Invalid value for --b_type " + b_type); arg.c_type = c_type == "" ? prec : string2hipblas_datatype(c_type); if(arg.c_type == HIPBLAS_DATATYPE_INVALID) throw std::invalid_argument("Invalid value for --c_type " + c_type); arg.d_type = d_type == "" ? prec : string2hipblas_datatype(d_type); if(arg.d_type == HIPBLAS_DATATYPE_INVALID) throw std::invalid_argument("Invalid value for --d_type " + d_type); arg.compute_type = compute_type == "" ? prec : string2hipblas_datatype(compute_type); if(arg.compute_type == HIPBLAS_DATATYPE_INVALID) throw std::invalid_argument("Invalid value for --compute_type " + compute_type); arg.compute_type_gemm = string2hipblas_computetype(compute_type_gemm); arg.initialization = string2hipblas_initialization(initialization); if(arg.initialization == static_cast(0)) // invalid enum throw std::invalid_argument("Invalid value for --initialization " + initialization); if(arg.M < 0) throw std::invalid_argument("Invalid value for -m " + std::to_string(arg.M)); if(arg.N < 0) throw std::invalid_argument("Invalid value for -n " + std::to_string(arg.N)); if(arg.K < 0) throw std::invalid_argument("Invalid value for -k " + std::to_string(arg.K)); int copied = snprintf(arg.function, sizeof(arg.function), "%s", function.c_str()); if(copied <= 0 || copied >= sizeof(arg.function)) throw std::invalid_argument("Invalid value for --function"); if(!parallel_devices) return run_bench_test(arg, 0, 1); else return run_bench_multi_gpu_test(parallel_devices, arg); } catch(const std::invalid_argument& exp) { std::cerr << exp.what() << std::endl; return -1; } hipBLAS-rocm-6.4.3/clients/cmake/000077500000000000000000000000001500474223100164235ustar00rootroot00000000000000hipBLAS-rocm-6.4.3/clients/cmake/build-options.cmake000066400000000000000000000022321500474223100222140ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2016-2024 Advanced Micro Devices, Inc. # ######################################################################## # This file is intended to be used in two ways; independently in a stand alone PROJECT # and as part of a superbuild. If the file is included in a stand alone project, the # variables are not expected to be preset, and this will produce options() in the GUI # for the user to examine. If this file is included in a superbuild, the options will be # presented in the superbuild GUI, but then passed into the ExternalProject as -D # parameters, which would already define them. if( NOT BUILD_CLIENTS_TESTS ) option( BUILD_CLIENTS_TESTS "Build hipBLAS unit tests" OFF ) endif( ) if( NOT BUILD_CLIENTS_BENCHMARKS ) option( BUILD_CLIENTS_BENCHMARKS "Build hipBLAS benchmarks" OFF ) endif( ) if( NOT BUILD_CLIENTS_SAMPLES ) option( BUILD_CLIENTS_SAMPLES "Build hipBLAS samples" OFF ) endif( ) if( HIP_PLATFORM STREQUAL nvidia ) option( LINK_BLIS "Link AOCL Blis reference library" OFF ) else() option( LINK_BLIS "Link AOCL Blis reference library" ON ) endif() hipBLAS-rocm-6.4.3/clients/common/000077500000000000000000000000001500474223100166335ustar00rootroot00000000000000hipBLAS-rocm-6.4.3/clients/common/arg_check.cpp000066400000000000000000000031571500474223100212530ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "arg_check.h" #include "hipblas.h" #include void verify_hipblas_status_invalid_value(hipblasStatus_t status, const char* message) { #ifdef GOOGLE_TEST ASSERT_EQ(status, HIPBLAS_STATUS_INVALID_VALUE); #endif if(status != HIPBLAS_STATUS_INVALID_VALUE) { std::cout << message << std::endl; } } hipBLAS-rocm-6.4.3/clients/common/argument_model.cpp000066400000000000000000000034571500474223100223520ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * ************************************************************************ */ #include "argument_model.hpp" // this should have been a member variable but due to the complex variadic template this singleton allows global control static bool log_function_name = false; void ArgumentModel_set_log_function_name(bool f) { log_function_name = f; } bool ArgumentModel_get_log_function_name() { return log_function_name; } static bool log_datatype = false; void ArgumentModel_set_log_datatype(bool d) { log_datatype = d; } bool ArgumentModel_get_log_datatype() { return log_datatype; } hipBLAS-rocm-6.4.3/clients/common/blis_interface.cpp000066400000000000000000000026361500474223100223170ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include void setup_blis() { #ifndef WIN32 bli_init(); #endif } static int initialize_blis = (setup_blis(), 0); hipBLAS-rocm-6.4.3/clients/common/cblas_interface.cpp000066400000000000000000004327061500474223100224570ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * * ************************************************************************/ #if !defined(WIN32) && defined(FLA_ENABLE_ILP64) #include #endif #include "cblas_interface.h" #include "hipblas.h" #include "lapack_utilities.hpp" #include "utility.h" #include #include #include /*!\file * \brief provide template functions interfaces to CBLAS C89 interfaces, it is only used for testing * not part of the GPU library */ #ifndef FLA_ENABLE_ILP64 #ifdef __cplusplus extern "C" { #endif void spotrf_(char* uplo, int64_t* m, float* A, int64_t* lda, int64_t* info); void dpotrf_(char* uplo, int64_t* m, double* A, int64_t* lda, int64_t* info); void cpotrf_(char* uplo, int64_t* m, hipblasComplex* A, int64_t* lda, int64_t* info); void zpotrf_(char* uplo, int64_t* m, hipblasDoubleComplex* A, int64_t* lda, int64_t* info); void sgetrf_(int64_t* m, int64_t* n, float* A, int64_t* lda, int64_t* ipiv, int64_t* info); void dgetrf_(int64_t* m, int64_t* n, double* A, int64_t* lda, int64_t* ipiv, int64_t* info); void cgetrf_(int64_t* m, int64_t* n, hipblasComplex* A, int64_t* lda, int64_t* ipiv, int64_t* info); void zgetrf_( int64_t* m, int64_t* n, hipblasDoubleComplex* A, int64_t* lda, int64_t* ipiv, int64_t* info); void sgetrs_(char* trans, int64_t* n, int64_t* nrhs, float* A, int64_t* lda, int64_t* ipiv, float* B, int64_t* ldb, int64_t* info); void dgetrs_(char* trans, int64_t* n, int64_t* nrhs, double* A, int64_t* lda, int64_t* ipiv, double* B, int64_t* ldb, int64_t* info); void cgetrs_(char* trans, int64_t* n, int64_t* nrhs, hipblasComplex* A, int64_t* lda, int64_t* ipiv, hipblasComplex* B, int64_t* ldb, int64_t* info); void zgetrs_(char* trans, int64_t* n, int64_t* nrhs, hipblasDoubleComplex* A, int64_t* lda, int64_t* ipiv, hipblasDoubleComplex* B, int64_t* ldb, int64_t* info); void sgetri_( int64_t* n, float* A, int64_t* lda, int64_t* ipiv, float* work, int64_t* lwork, int64_t* info); void dgetri_(int64_t* n, double* A, int64_t* lda, int64_t* ipiv, double* work, int64_t* lwork, int64_t* info); void cgetri_(int64_t* n, hipblasComplex* A, int64_t* lda, int64_t* ipiv, hipblasComplex* work, int64_t* lwork, int64_t* info); void zgetri_(int64_t* n, hipblasDoubleComplex* A, int64_t* lda, int64_t* ipiv, hipblasDoubleComplex* work, int64_t* lwork, int64_t* info); void sgeqrf_(int64_t* m, int64_t* n, float* A, int64_t* lda, float* tau, float* work, int64_t* lwork, int64_t* info); void dgeqrf_(int64_t* m, int64_t* n, double* A, int64_t* lda, double* tau, double* work, int64_t* lwork, int64_t* info); void cgeqrf_(int64_t* m, int64_t* n, hipblasComplex* A, int64_t* lda, hipblasComplex* tau, hipblasComplex* work, int64_t* lwork, int64_t* info); void zgeqrf_(int64_t* m, int64_t* n, hipblasDoubleComplex* A, int64_t* lda, hipblasDoubleComplex* tau, hipblasDoubleComplex* work, int64_t* lwork, int64_t* info); void sgels_(char* trans, int64_t* m, int64_t* n, int64_t* nrhs, float* A, int64_t* lda, float* B, int64_t* ldb, float* work, int64_t* lwork, int64_t* info); void dgels_(char* trans, int64_t* m, int64_t* n, int64_t* nrhs, double* A, int64_t* lda, double* B, int64_t* ldb, double* work, int64_t* lwork, int64_t* info); void cgels_(char* trans, int64_t* m, int64_t* n, int64_t* nrhs, hipblasComplex* A, int64_t* lda, hipblasComplex* B, int64_t* ldb, hipblasComplex* work, int64_t* lwork, int64_t* info); void zgels_(char* trans, int64_t* m, int64_t* n, int64_t* nrhs, hipblasDoubleComplex* A, int64_t* lda, hipblasDoubleComplex* B, int64_t* ldb, hipblasDoubleComplex* work, int64_t* lwork, int64_t* info); /* void strtri_(char* uplo, char* diag, int64_t* n, float* A, int64_t* lda, int64_t* info); void dtrtri_(char* uplo, char* diag, int64_t* n, double* A, int64_t* lda, int64_t* info); void ctrtri_(char* uplo, char* diag, int64_t* n, hipblasComplex* A, int64_t* lda, int64_t* info); void ztrtri_(char* uplo, char* diag, int64_t* n, hipblasDoubleComplex* A, int64_t* lda, int64_t* info); void cspr_( char* uplo, int64_t* n, hipblasComplex* alpha, hipblasComplex* x, int64_t* incx, hipblasComplex* A); void zspr_(char* uplo, int64_t* n, hipblasDoubleComplex* alpha, hipblasDoubleComplex* x, int64_t* incx, hipblasDoubleComplex* A); void csyr_(char* uplo, int64_t* n, hipblasComplex* alpha, hipblasComplex* x, int64_t* incx, hipblasComplex* a, int64_t* lda); void zsyr_(char* uplo, int64_t* n, hipblasDoubleComplex* alpha, hipblasDoubleComplex* x, int64_t* incx, hipblasDoubleComplex* a, int64_t* lda); void csymv_(char* uplo, int64_t* n, hipblasComplex* alpha, hipblasComplex* A, int64_t* lda, hipblasComplex* x, int64_t* incx, hipblasComplex* beta, hipblasComplex* y, int64_t* incy); void zsymv_(char* uplo, int64_t* n, hipblasDoubleComplex* alpha, hipblasDoubleComplex* A, int64_t* lda, hipblasDoubleComplex* x, int64_t* incx, hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int64_t* incy); */ #ifdef __cplusplus } #endif #endif /* * =========================================================================== * level 1 BLAS * =========================================================================== */ // axpy template <> void ref_axpy(int64_t n, const hipblasBfloat16 alpha, const hipblasBfloat16* x, int64_t incx, hipblasBfloat16* y, int64_t incy) { size_t abs_incx = incx >= 0 ? incx : -incx; size_t abs_incy = incy >= 0 ? incy : -incy; std::vector x_float(n * abs_incx); std::vector y_float(n * abs_incy); for(size_t i = 0; i < n; i++) { x_float[i * abs_incx] = bfloat16_to_float(x[i * abs_incx]); y_float[i * abs_incy] = bfloat16_to_float(y[i * abs_incy]); } cblas_saxpy(n, bfloat16_to_float(alpha), x_float.data(), incx, y_float.data(), incy); for(size_t i = 0; i < n; i++) { y[i * abs_incy] = float_to_bfloat16(y_float[i * abs_incy]); } } template <> void ref_axpy(int64_t n, const float alpha, const hipblasBfloat16* x, int64_t incx, hipblasBfloat16* y, int64_t incy) { size_t abs_incx = incx >= 0 ? incx : -incx; size_t abs_incy = incy >= 0 ? incy : -incy; std::vector x_float(n * abs_incx); std::vector y_float(n * abs_incy); for(size_t i = 0; i < n; i++) { x_float[i * abs_incx] = bfloat16_to_float(x[i * abs_incx]); y_float[i * abs_incy] = bfloat16_to_float(y[i * abs_incy]); } cblas_saxpy(n, alpha, x_float.data(), incx, y_float.data(), incy); for(size_t i = 0; i < n; i++) { y[i * abs_incy] = float_to_bfloat16(y_float[i * abs_incy]); } } template <> void ref_axpy(int64_t n, const hipblasHalf alpha, const hipblasHalf* x, int64_t incx, hipblasHalf* y, int64_t incy) { size_t abs_incx = incx >= 0 ? incx : -incx; size_t abs_incy = incy >= 0 ? incy : -incy; std::vector x_float(n * abs_incx); std::vector y_float(n * abs_incy); for(size_t i = 0; i < n; i++) { x_float[i * abs_incx] = half_to_float(x[i * abs_incx]); y_float[i * abs_incy] = half_to_float(y[i * abs_incy]); } cblas_saxpy(n, half_to_float(alpha), x_float.data(), incx, y_float.data(), incy); for(size_t i = 0; i < n; i++) { y[i * abs_incy] = float_to_half(y_float[i * abs_incy]); } } template <> void ref_axpy( int64_t n, const float alpha, const hipblasHalf* x, int64_t incx, hipblasHalf* y, int64_t incy) { size_t abs_incx = incx >= 0 ? incx : -incx; size_t abs_incy = incy >= 0 ? incy : -incy; std::vector x_float(n * abs_incx); std::vector y_float(n * abs_incy); for(size_t i = 0; i < n; i++) { x_float[i * abs_incx] = half_to_float(x[i * abs_incx]); y_float[i * abs_incy] = half_to_float(y[i * abs_incy]); } cblas_saxpy(n, alpha, x_float.data(), incx, y_float.data(), incy); for(size_t i = 0; i < n; i++) { y[i * abs_incy] = float_to_half(y_float[i * abs_incy]); } } template <> void ref_axpy( int64_t n, const float alpha, const float* x, int64_t incx, float* y, int64_t incy) { cblas_saxpy(n, alpha, x, incx, y, incy); } template <> void ref_axpy( int64_t n, const double alpha, const double* x, int64_t incx, double* y, int64_t incy) { cblas_daxpy(n, alpha, x, incx, y, incy); } template <> void ref_axpy(int64_t n, const hipblasComplex alpha, const hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy) { cblas_caxpy(n, &alpha, x, incx, y, incy); } template <> void ref_axpy(int64_t n, const hipblasDoubleComplex alpha, const hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy) { cblas_zaxpy(n, &alpha, x, incx, y, incy); } // asum template <> void ref_asum(int64_t n, const float* x, int64_t incx, float* result) { if(n <= 0 || incx <= 0) return; float sum = 0; // using partial sums to reduce rounding errors for 64-bit n int64_t block_size = 1024 * 512; int64_t blocks = (n - 1) / block_size + 1; for(int64_t b = 0; b < blocks; b++) { float partial_sum = 0; for(int64_t i = 0; i < block_size; i++) { int64_t idx = i + b * block_size; if(idx < n) partial_sum += std::abs(x[idx * incx]); } sum += partial_sum; } *result = sum; } // scal template <> void ref_scal(int64_t n, const hipblasHalf alpha, hipblasHalf* x, int64_t incx) { if(n <= 0 || incx <= 0) return; std::vector x_float(n * incx); for(size_t i = 0; i < n; i++) x_float[i * incx] = half_to_float(x[i * incx]); cblas_sscal(n, half_to_float(alpha), x_float.data(), incx); for(size_t i = 0; i < n; i++) x[i * incx] = float_to_half(x_float[i * incx]); } template <> void ref_scal(int64_t n, const hipblasBfloat16 alpha, hipblasBfloat16* x, int64_t incx) { if(n <= 0 || incx <= 0) return; std::vector x_float(n * incx); for(size_t i = 0; i < n; i++) x_float[i * incx] = bfloat16_to_float(x[i * incx]); cblas_sscal(n, bfloat16_to_float(alpha), x_float.data(), incx); for(size_t i = 0; i < n; i++) x[i * incx] = float_to_bfloat16(x_float[i * incx]); } template <> void ref_scal(int64_t n, const float alpha, hipblasHalf* x, int64_t incx) { if(n <= 0 || incx <= 0) return; std::vector x_float(n * incx); for(size_t i = 0; i < n; i++) x_float[i * incx] = half_to_float(x[i * incx]); cblas_sscal(n, alpha, x_float.data(), incx); for(size_t i = 0; i < n; i++) x[i * incx] = float_to_half(x_float[i * incx]); } template <> void ref_scal(int64_t n, const float alpha, hipblasBfloat16* x, int64_t incx) { if(n <= 0 || incx <= 0) return; std::vector x_float(n * incx); for(size_t i = 0; i < n; i++) x_float[i * incx] = bfloat16_to_float(x[i * incx]); cblas_sscal(n, alpha, x_float.data(), incx); for(size_t i = 0; i < n; i++) x[i * incx] = float_to_bfloat16(x_float[i * incx]); } template <> void ref_scal(int64_t n, const float alpha, float* x, int64_t incx) { cblas_sscal(n, alpha, x, incx); } template <> void ref_scal(int64_t n, const double alpha, double* x, int64_t incx) { cblas_dscal(n, alpha, x, incx); } template <> void ref_scal(int64_t n, const hipblasComplex alpha, hipblasComplex* x, int64_t incx) { cblas_cscal(n, &alpha, x, incx); } template <> void ref_scal(int64_t n, const float alpha, hipblasComplex* x, int64_t incx) { cblas_csscal(n, alpha, x, incx); } template <> void ref_scal(int64_t n, const hipblasDoubleComplex alpha, hipblasDoubleComplex* x, int64_t incx) { cblas_zscal(n, &alpha, x, incx); } template <> void ref_scal(int64_t n, const double alpha, hipblasDoubleComplex* x, int64_t incx) { cblas_zdscal(n, alpha, x, incx); } // copy template <> void ref_copy(int64_t n, float* x, int64_t incx, float* y, int64_t incy) { cblas_scopy(n, x, incx, y, incy); } template <> void ref_copy(int64_t n, double* x, int64_t incx, double* y, int64_t incy) { cblas_dcopy(n, x, incx, y, incy); } template <> void ref_copy( int64_t n, hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy) { cblas_ccopy(n, x, incx, y, incy); } template <> void ref_copy( int64_t n, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy) { cblas_zcopy(n, x, incx, y, incy); } // swap template <> void ref_swap(int64_t n, float* x, int64_t incx, float* y, int64_t incy) { cblas_sswap(n, x, incx, y, incy); } template <> void ref_swap(int64_t n, double* x, int64_t incx, double* y, int64_t incy) { cblas_dswap(n, x, incx, y, incy); } template <> void ref_swap( int64_t n, hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy) { cblas_cswap(n, x, incx, y, incy); } template <> void ref_swap( int64_t n, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy) { cblas_zswap(n, x, incx, y, incy); } // dot template <> void ref_dot(int64_t n, const hipblasHalf* x, int64_t incx, const hipblasHalf* y, int64_t incy, hipblasHalf* result) { size_t abs_incx = incx >= 0 ? incx : -incx; size_t abs_incy = incy >= 0 ? incy : -incy; std::vector x_float(n * abs_incx); std::vector y_float(n * abs_incy); for(size_t i = 0; i < n; i++) { x_float[i * abs_incx] = half_to_float(x[i * abs_incx]); y_float[i * abs_incy] = half_to_float(y[i * abs_incy]); } *result = float_to_half(cblas_sdot(n, x_float.data(), incx, y_float.data(), incy)); } template <> void ref_dot(int64_t n, const hipblasBfloat16* x, int64_t incx, const hipblasBfloat16* y, int64_t incy, hipblasBfloat16* result) { size_t abs_incx = incx >= 0 ? incx : -incx; size_t abs_incy = incy >= 0 ? incy : -incy; std::vector x_float(n * abs_incx); std::vector y_float(n * abs_incy); for(size_t i = 0; i < n; i++) { x_float[i * abs_incx] = bfloat16_to_float(x[i * abs_incx]); y_float[i * abs_incy] = bfloat16_to_float(y[i * abs_incy]); } *result = float_to_bfloat16(cblas_sdot(n, x_float.data(), incx, y_float.data(), incy)); } template <> void ref_dot( int64_t n, const float* x, int64_t incx, const float* y, int64_t incy, float* result) { *result = cblas_sdot(n, x, incx, y, incy); } template <> void ref_dot( int64_t n, const double* x, int64_t incx, const double* y, int64_t incy, double* result) { *result = cblas_ddot(n, x, incx, y, incy); } template <> void ref_dot(int64_t n, const hipblasComplex* x, int64_t incx, const hipblasComplex* y, int64_t incy, hipblasComplex* result) { cblas_cdotu_sub(n, x, incx, y, incy, result); } template <> void ref_dot(int64_t n, const hipblasDoubleComplex* x, int64_t incx, const hipblasDoubleComplex* y, int64_t incy, hipblasDoubleComplex* result) { cblas_zdotu_sub(n, x, incx, y, incy, result); } template <> void ref_dotc(int64_t n, const hipblasHalf* x, int64_t incx, const hipblasHalf* y, int64_t incy, hipblasHalf* result) { // Not complex - call regular dot. ref_dot(n, x, incx, y, incy, result); } template <> void ref_dotc(int64_t n, const hipblasBfloat16* x, int64_t incx, const hipblasBfloat16* y, int64_t incy, hipblasBfloat16* result) { // Not complex - call regular dot. ref_dot(n, x, incx, y, incy, result); } template <> void ref_dotc( int64_t n, const float* x, int64_t incx, const float* y, int64_t incy, float* result) { // Not complex - call regular dot. ref_dot(n, x, incx, y, incy, result); } template <> void ref_dotc( int64_t n, const double* x, int64_t incx, const double* y, int64_t incy, double* result) { // Not complex - call regular dot. ref_dot(n, x, incx, y, incy, result); } template <> void ref_dotc(int64_t n, const hipblasComplex* x, int64_t incx, const hipblasComplex* y, int64_t incy, hipblasComplex* result) { cblas_cdotc_sub(n, x, incx, y, incy, result); } template <> void ref_dotc(int64_t n, const hipblasDoubleComplex* x, int64_t incx, const hipblasDoubleComplex* y, int64_t incy, hipblasDoubleComplex* result) { cblas_zdotc_sub(n, x, incx, y, incy, result); } // nrm2 template <> void ref_nrm2(int64_t n, const hipblasHalf* x, int64_t incx, hipblasHalf* result) { if(n <= 0 || incx <= 0) return; std::vector x_float(n * incx); for(size_t i = 0; i < n; i++) x_float[i * incx] = half_to_float(x[i * incx]); *result = float_to_half(cblas_snrm2(n, x_float.data(), incx)); } template <> void ref_nrm2(int64_t n, const hipblasBfloat16* x, int64_t incx, hipblasBfloat16* result) { if(n <= 0 || incx <= 0) return; std::vector x_float(n * incx); for(size_t i = 0; i < n; i++) x_float[i * incx] = bfloat16_to_float(x[i * incx]); *result = float_to_bfloat16(cblas_snrm2(n, x_float.data(), incx)); } template <> void ref_nrm2(int64_t n, const float* x, int64_t incx, float* result) { *result = cblas_snrm2(n, x, incx); } template <> void ref_nrm2(int64_t n, const double* x, int64_t incx, double* result) { *result = cblas_dnrm2(n, x, incx); } template <> void ref_nrm2(int64_t n, const hipblasComplex* x, int64_t incx, float* result) { *result = cblas_scnrm2(n, x, incx); } template <> void ref_nrm2(int64_t n, const hipblasDoubleComplex* x, int64_t incx, double* result) { *result = cblas_dznrm2(n, x, incx); } /////////////////// // rot functions // /////////////////// // LAPACK fortran library functionality extern "C" { void crot_(const int64_t* n, hipblasComplex* cx, const int64_t* incx, hipblasComplex* cy, const int64_t* incy, const float* c, const hipblasComplex* s); void csrot_(const int64_t* n, hipblasComplex* cx, const int64_t* incx, hipblasComplex* cy, const int64_t* incy, const float* c, const float* s); void zrot_(const int64_t* n, hipblasDoubleComplex* cx, const int64_t* incx, hipblasDoubleComplex* cy, const int64_t* incy, const double* c, const hipblasDoubleComplex* s); void zdrot_(const int64_t* n, hipblasDoubleComplex* cx, const int64_t* incx, hipblasDoubleComplex* cy, const int64_t* incy, const double* c, const double* s); void crotg_(hipblasComplex* a, hipblasComplex* b, float* c, hipblasComplex* s); void zrotg_(hipblasDoubleComplex* a, hipblasDoubleComplex* b, double* c, hipblasDoubleComplex* s); } // rot template <> void ref_rot(int64_t n, hipblasHalf* x, int64_t incx, hipblasHalf* y, int64_t incy, hipblasHalf c, hipblasHalf s) { size_t abs_incx = incx >= 0 ? incx : -incx; size_t abs_incy = incy >= 0 ? incy : -incy; size_t size_x = n * abs_incx; size_t size_y = n * abs_incy; if(!size_x) size_x = 1; if(!size_y) size_y = 1; std::vector x_float(size_x); std::vector y_float(size_y); for(size_t i = 0; i < n; i++) { x_float[i * abs_incx] = half_to_float(x[i * abs_incx]); y_float[i * abs_incy] = half_to_float(y[i * abs_incy]); } const float c_float = half_to_float(c); const float s_float = half_to_float(s); cblas_srot(n, x_float.data(), incx, y_float.data(), incy, c_float, s_float); for(size_t i = 0; i < n; i++) { x[i * abs_incx] = float_to_half(x_float[i * abs_incx]); y[i * abs_incy] = float_to_half(y_float[i * abs_incy]); } } template <> void ref_rot(int64_t n, hipblasBfloat16* x, int64_t incx, hipblasBfloat16* y, int64_t incy, hipblasBfloat16 c, hipblasBfloat16 s) { size_t abs_incx = incx >= 0 ? incx : -incx; size_t abs_incy = incy >= 0 ? incy : -incy; size_t size_x = n * abs_incx; size_t size_y = n * abs_incy; if(!size_x) size_x = 1; if(!size_y) size_y = 1; std::vector x_float(size_x); std::vector y_float(size_y); for(size_t i = 0; i < n; i++) { x_float[i * abs_incx] = bfloat16_to_float(x[i * abs_incx]); y_float[i * abs_incy] = bfloat16_to_float(y[i * abs_incy]); } const float c_float = bfloat16_to_float(c); const float s_float = bfloat16_to_float(s); cblas_srot(n, x_float.data(), incx, y_float.data(), incy, c_float, s_float); for(size_t i = 0; i < n; i++) { x[i * abs_incx] = float_to_bfloat16(x_float[i * abs_incx]); y[i * abs_incy] = float_to_bfloat16(y_float[i * abs_incy]); } } template <> void ref_rot(int64_t n, float* x, int64_t incx, float* y, int64_t incy, float c, float s) { cblas_srot(n, x, incx, y, incy, c, s); } template <> void ref_rot( int64_t n, double* x, int64_t incx, double* y, int64_t incy, double c, double s) { cblas_drot(n, x, incx, y, incy, c, s); } template <> void ref_rot(int64_t n, hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy, hipblasComplex c, hipblasComplex s) { float c_real = std::real(c); lapack_xrot(n, x, incx, y, incy, c_real, s); } template <> void ref_rot(int64_t n, hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy, float c, hipblasComplex s) { lapack_xrot(n, x, incx, y, incy, c, s); } template <> void ref_rot( int64_t n, hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy, float c, float s) { lapack_xrot(n, x, incx, y, incy, c, s); } template <> void ref_rot(int64_t n, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy, hipblasDoubleComplex c, hipblasDoubleComplex s) { double c_real = std::real(c); lapack_xrot(n, x, incx, y, incy, c_real, s); } template <> void ref_rot(int64_t n, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy, double c, hipblasDoubleComplex s) { lapack_xrot(n, x, incx, y, incy, c, s); } template <> void ref_rot(int64_t n, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy, double c, double s) { lapack_xrot(n, x, incx, y, incy, c, s); } // rotg template <> void ref_rotg(float* a, float* b, float* c, float* s) { cblas_srotg(a, b, c, s); } template <> void ref_rotg(double* a, double* b, double* c, double* s) { cblas_drotg(a, b, c, s); } template <> void ref_rotg(hipblasComplex* a, hipblasComplex* b, float* c, hipblasComplex* s) { lapack_xrotg(*a, *b, *c, *s); } template <> void ref_rotg(hipblasDoubleComplex* a, hipblasDoubleComplex* b, double* c, hipblasDoubleComplex* s) { lapack_xrotg(*a, *b, *c, *s); } // asum /* template <> void ref_asum(int64_t n, const float* x, int64_t incx, float* result) { *result = ref_sasum(n, x, incx); } template <> void ref_asum(int64_t n, const double* x, int64_t incx, double* result) { *result = ref_dasum(n, x, incx); } template <> void ref_asum(int64_t n, const hipblasComplex* x, int64_t incx, float* result) { *result = ref_scasum(n, x, incx); } template <> void ref_asum(int64_t n, const hipblasDoubleComplex* x, int64_t incx, double* result) { *result = ref_dzasum(n, x, incx); } */ // amax /* local versions of amax and amin for minimum index in case of ties. See hipblas_iamax_imin_fef.hpp template <> void ref_iamax(int64_t n, const float* x, int64_t incx, int64_t* result) { *result = (int64_t)cblas_isamax(n, x, incx); } template <> void ref_iamax(int64_t n, const double* x, int64_t incx, int64_t* result) { *result = (int64_t)cblas_idamax(n, x, incx); } template <> void ref_iamax(int64_t n, const hipblasComplex* x, int64_t incx, int64_t* result) { *result = (int64_t)cblas_icamax(n, x, incx); } template <> void ref_iamax(int64_t n, const hipblasDoubleComplex* x, int64_t incx, int64_t* result) { *result = (int64_t)cblas_izamax(n, x, incx); } // amin // amin is not implemented in cblas, make local version template double hipblas_magnitude(T val) { return val < 0 ? -val : val; } template <> double hipblas_magnitude(hipblasComplex val) { return std::abs(val.real()) + std::abs(val.imag()); } template <> double hipblas_magnitude(hipblasDoubleComplex val) { return std::abs(val.real()) + std::abs(val.imag()); } template int64_t ref_iamin_helper(int64_t N, const T* X, int64_t incx) { int64_t minpos = -1; if(N > 0 && incx > 0) { auto min = hipblas_magnitude(X[0]); minpos = 0; for(size_t i = 1; i < N; ++i) { auto a = hipblas_magnitude(X[i * incx]); if(a < min) { min = a; minpos = i; } } } return minpos; } template <> void ref_iamin(int64_t n, const float* x, int64_t incx, int64_t* result) { *result = (int64_t)ref_iamin_helper(n, x, incx); } template <> void ref_iamin(int64_t n, const double* x, int64_t incx, int64_t* result) { *result = (int64_t)ref_iamin_helper(n, x, incx); } template <> void ref_iamin(int64_t n, const hipblasComplex* x, int64_t incx, int64_t* result) { *result = (int64_t)ref_iamin_helper(n, x, incx); } template <> void ref_iamin(int64_t n, const hipblasDoubleComplex* x, int64_t incx, int64_t* result) { *result = (int64_t)ref_iamin_helper(n, x, incx); } */ /* * =========================================================================== * level 2 BLAS * =========================================================================== */ // gbmv template <> void ref_gbmv(hipblasOperation_t transA, int64_t m, int64_t n, int64_t kl, int64_t ku, float alpha, float* A, int64_t lda, float* x, int64_t incx, float beta, float* y, int64_t incy) { cblas_sgbmv(CblasColMajor, (CBLAS_TRANSPOSE)transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); } template <> void ref_gbmv(hipblasOperation_t transA, int64_t m, int64_t n, int64_t kl, int64_t ku, double alpha, double* A, int64_t lda, double* x, int64_t incx, double beta, double* y, int64_t incy) { cblas_dgbmv(CblasColMajor, (CBLAS_TRANSPOSE)transA, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); } template <> void ref_gbmv(hipblasOperation_t transA, int64_t m, int64_t n, int64_t kl, int64_t ku, hipblasComplex alpha, hipblasComplex* A, int64_t lda, hipblasComplex* x, int64_t incx, hipblasComplex beta, hipblasComplex* y, int64_t incy) { cblas_cgbmv(CblasColMajor, (CBLAS_TRANSPOSE)transA, m, n, kl, ku, &alpha, A, lda, x, incx, &beta, y, incy); } template <> void ref_gbmv(hipblasOperation_t transA, int64_t m, int64_t n, int64_t kl, int64_t ku, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex beta, hipblasDoubleComplex* y, int64_t incy) { cblas_zgbmv(CblasColMajor, (CBLAS_TRANSPOSE)transA, m, n, kl, ku, &alpha, A, lda, x, incx, &beta, y, incy); } // gemv template <> void ref_gemv(hipblasOperation_t transA, int64_t m, int64_t n, float alpha, float* A, int64_t lda, float* x, int64_t incx, float beta, float* y, int64_t incy) { cblas_sgemv( CblasColMajor, (CBLAS_TRANSPOSE)transA, m, n, alpha, A, lda, x, incx, beta, y, incy); } template <> void ref_gemv(hipblasOperation_t transA, int64_t m, int64_t n, double alpha, double* A, int64_t lda, double* x, int64_t incx, double beta, double* y, int64_t incy) { cblas_dgemv( CblasColMajor, (CBLAS_TRANSPOSE)transA, m, n, alpha, A, lda, x, incx, beta, y, incy); } template <> void ref_gemv(hipblasOperation_t transA, int64_t m, int64_t n, hipblasComplex alpha, hipblasComplex* A, int64_t lda, hipblasComplex* x, int64_t incx, hipblasComplex beta, hipblasComplex* y, int64_t incy) { cblas_cgemv( CblasColMajor, (CBLAS_TRANSPOSE)transA, m, n, &alpha, A, lda, x, incx, &beta, y, incy); } template <> void ref_gemv(hipblasOperation_t transA, int64_t m, int64_t n, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex beta, hipblasDoubleComplex* y, int64_t incy) { cblas_zgemv( CblasColMajor, (CBLAS_TRANSPOSE)transA, m, n, &alpha, A, lda, x, incx, &beta, y, incy); } // ger template <> void ref_ger(int64_t m, int64_t n, float alpha, float* x, int64_t incx, float* y, int64_t incy, float* A, int64_t lda) { cblas_sger(CblasColMajor, m, n, alpha, x, incx, y, incy, A, lda); } template <> void ref_ger(int64_t m, int64_t n, double alpha, double* x, int64_t incx, double* y, int64_t incy, double* A, int64_t lda) { cblas_dger(CblasColMajor, m, n, alpha, x, incx, y, incy, A, lda); } template <> void ref_ger(int64_t m, int64_t n, hipblasComplex alpha, hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy, hipblasComplex* A, int64_t lda) { cblas_cgeru(CblasColMajor, m, n, &alpha, x, incx, y, incy, A, lda); } template <> void ref_ger(int64_t m, int64_t n, hipblasComplex alpha, hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy, hipblasComplex* A, int64_t lda) { cblas_cgerc(CblasColMajor, m, n, &alpha, x, incx, y, incy, A, lda); } template <> void ref_ger(int64_t m, int64_t n, hipblasDoubleComplex alpha, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy, hipblasDoubleComplex* A, int64_t lda) { cblas_zgeru(CblasColMajor, m, n, &alpha, x, incx, y, incy, A, lda); } template <> void ref_ger(int64_t m, int64_t n, hipblasDoubleComplex alpha, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy, hipblasDoubleComplex* A, int64_t lda) { cblas_zgerc(CblasColMajor, m, n, &alpha, x, incx, y, incy, A, lda); } // hbmv template <> void ref_hbmv(hipblasFillMode_t uplo, int64_t n, int64_t k, hipblasComplex alpha, hipblasComplex* A, int64_t lda, hipblasComplex* x, int64_t incx, hipblasComplex beta, hipblasComplex* y, int64_t incy) { cblas_chbmv(CblasColMajor, (CBLAS_UPLO)uplo, n, k, &alpha, A, lda, x, incx, &beta, y, incy); } template <> void ref_hbmv(hipblasFillMode_t uplo, int64_t n, int64_t k, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex beta, hipblasDoubleComplex* y, int64_t incy) { cblas_zhbmv(CblasColMajor, (CBLAS_UPLO)uplo, n, k, &alpha, A, lda, x, incx, &beta, y, incy); } // hemv template <> void ref_hemv(hipblasFillMode_t uplo, int64_t n, hipblasComplex alpha, hipblasComplex* A, int64_t lda, hipblasComplex* x, int64_t incx, hipblasComplex beta, hipblasComplex* y, int64_t incy) { cblas_chemv(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, A, lda, x, incx, &beta, y, incy); } template <> void ref_hemv(hipblasFillMode_t uplo, int64_t n, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex beta, hipblasDoubleComplex* y, int64_t incy) { cblas_zhemv(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, A, lda, x, incx, &beta, y, incy); } // her template <> void ref_her(hipblasFillMode_t uplo, int64_t n, float alpha, hipblasComplex* x, int64_t incx, hipblasComplex* A, int64_t lda) { cblas_cher(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, A, lda); } template <> void ref_her(hipblasFillMode_t uplo, int64_t n, double alpha, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* A, int64_t lda) { cblas_zher(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, A, lda); } // her2 template <> void ref_her2(hipblasFillMode_t uplo, int64_t n, hipblasComplex alpha, hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy, hipblasComplex* A, int64_t lda) { cblas_cher2(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, x, incx, y, incy, A, lda); } template <> void ref_her2(hipblasFillMode_t uplo, int64_t n, hipblasDoubleComplex alpha, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy, hipblasDoubleComplex* A, int64_t lda) { cblas_zher2(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, x, incx, y, incy, A, lda); } // hpmv template <> void ref_hpmv(hipblasFillMode_t uplo, int64_t n, hipblasComplex alpha, hipblasComplex* AP, hipblasComplex* x, int64_t incx, hipblasComplex beta, hipblasComplex* y, int64_t incy) { cblas_chpmv(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, AP, x, incx, &beta, y, incy); } template <> void ref_hpmv(hipblasFillMode_t uplo, int64_t n, hipblasDoubleComplex alpha, hipblasDoubleComplex* AP, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex beta, hipblasDoubleComplex* y, int64_t incy) { cblas_zhpmv(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, AP, x, incx, &beta, y, incy); } // hpr template <> void ref_hpr(hipblasFillMode_t uplo, int64_t n, float alpha, hipblasComplex* x, int64_t incx, hipblasComplex* AP) { cblas_chpr(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, AP); } template <> void ref_hpr(hipblasFillMode_t uplo, int64_t n, double alpha, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* AP) { cblas_zhpr(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, AP); } // hpr2 template <> void ref_hpr2(hipblasFillMode_t uplo, int64_t n, hipblasComplex alpha, hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy, hipblasComplex* AP) { cblas_chpr2(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, x, incx, y, incy, AP); } template <> void ref_hpr2(hipblasFillMode_t uplo, int64_t n, hipblasDoubleComplex alpha, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy, hipblasDoubleComplex* AP) { cblas_zhpr2(CblasColMajor, (CBLAS_UPLO)uplo, n, &alpha, x, incx, y, incy, AP); } // sbmv template <> void ref_sbmv(hipblasFillMode_t uplo, int64_t n, int64_t k, float alpha, float* A, int64_t lda, float* x, int64_t incx, float beta, float* y, int64_t incy) { cblas_ssbmv(CblasColMajor, (CBLAS_UPLO)uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); } template <> void ref_sbmv(hipblasFillMode_t uplo, int64_t n, int64_t k, double alpha, double* A, int64_t lda, double* x, int64_t incx, double beta, double* y, int64_t incy) { cblas_dsbmv(CblasColMajor, (CBLAS_UPLO)uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); } // spmv template <> void ref_spmv(hipblasFillMode_t uplo, int64_t n, float alpha, float* AP, float* x, int64_t incx, float beta, float* y, int64_t incy) { cblas_sspmv(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, AP, x, incx, beta, y, incy); } template <> void ref_spmv(hipblasFillMode_t uplo, int64_t n, double alpha, double* AP, double* x, int64_t incx, double beta, double* y, int64_t incy) { cblas_dspmv(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, AP, x, incx, beta, y, incy); } // spr template <> void ref_spr(hipblasFillMode_t uplo, int64_t n, float alpha, float* x, int64_t incx, float* AP) { cblas_sspr(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, AP); } template <> void ref_spr(hipblasFillMode_t uplo, int64_t n, double alpha, double* x, int64_t incx, double* AP) { cblas_dspr(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, AP); } template <> void ref_spr(hipblasFillMode_t uplo, int64_t n, hipblasComplex alpha, hipblasComplex* x, int64_t incx, hipblasComplex* AP) { lapack_xspr(uplo, n, alpha, x, incx, AP); } template <> void ref_spr(hipblasFillMode_t uplo, int64_t n, hipblasDoubleComplex alpha, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* AP) { lapack_xspr(uplo, n, alpha, x, incx, AP); } // spr2 template <> void ref_spr2(hipblasFillMode_t uplo, int64_t n, float alpha, float* x, int64_t incx, float* y, int64_t incy, float* AP) { cblas_sspr2(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, y, incy, AP); } template <> void ref_spr2(hipblasFillMode_t uplo, int64_t n, double alpha, double* x, int64_t incx, double* y, int64_t incy, double* AP) { cblas_dspr2(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, y, incy, AP); } // symv template <> void ref_symv(hipblasFillMode_t uplo, int64_t n, float alpha, float* A, int64_t lda, float* x, int64_t incx, float beta, float* y, int64_t incy) { cblas_ssymv(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, A, lda, x, incx, beta, y, incy); } template <> void ref_symv(hipblasFillMode_t uplo, int64_t n, double alpha, double* A, int64_t lda, double* x, int64_t incx, double beta, double* y, int64_t incy) { cblas_dsymv(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, A, lda, x, incx, beta, y, incy); } template <> void ref_symv(hipblasFillMode_t uplo, int64_t n, hipblasComplex alpha, hipblasComplex* A, int64_t lda, hipblasComplex* x, int64_t incx, hipblasComplex beta, hipblasComplex* y, int64_t incy) { lapack_xsymv(uplo, n, alpha, A, lda, x, incx, beta, y, incy); } template <> void ref_symv(hipblasFillMode_t uplo, int64_t n, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex beta, hipblasDoubleComplex* y, int64_t incy) { lapack_xsymv(uplo, n, alpha, A, lda, x, incx, beta, y, incy); } // syr template <> void ref_syr( hipblasFillMode_t uplo, int64_t n, float alpha, float* x, int64_t incx, float* A, int64_t lda) { cblas_ssyr(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, A, lda); } template <> void ref_syr(hipblasFillMode_t uplo, int64_t n, double alpha, double* x, int64_t incx, double* A, int64_t lda) { cblas_dsyr(CblasColMajor, (CBLAS_UPLO)uplo, n, alpha, x, incx, A, lda); } template <> void ref_syr(hipblasFillMode_t uplo, int64_t n, hipblasComplex alpha, hipblasComplex* xa, int64_t incx, hipblasComplex* A, int64_t lda) { lapack_xsyr(uplo, n, alpha, xa, incx, A, lda); } template <> void ref_syr(hipblasFillMode_t uplo, int64_t n, hipblasDoubleComplex alpha, hipblasDoubleComplex* xa, int64_t incx, hipblasDoubleComplex* A, int64_t lda) { lapack_xsyr(uplo, n, alpha, xa, incx, A, lda); } // syr2 template <> void ref_syr2(hipblasFillMode_t uplo, int64_t n, float alpha, float* x, int64_t incx, float* y, int64_t incy, float* A, int64_t lda) { cblas_ssyr2(CblasColMajor, CBLAS_UPLO(uplo), n, alpha, x, incx, y, incy, A, lda); } template <> void ref_syr2(hipblasFillMode_t uplo, int64_t n, double alpha, double* x, int64_t incx, double* y, int64_t incy, double* A, int64_t lda) { cblas_dsyr2(CblasColMajor, CBLAS_UPLO(uplo), n, alpha, x, incx, y, incy, A, lda); } template <> void ref_syr2(hipblasFillMode_t uplo, int64_t n, hipblasComplex alpha, hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy, hipblasComplex* A, int64_t lda) { lapack_xsyr2(uplo, n, alpha, x, incx, y, incy, A, lda); } template <> void ref_syr2(hipblasFillMode_t uplo, int64_t n, hipblasDoubleComplex alpha, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy, hipblasDoubleComplex* A, int64_t lda) { lapack_xsyr2(uplo, n, alpha, x, incx, y, incy, A, lda); } // tbmv template <> void ref_tbmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t k, const float* A, int64_t lda, float* x, int64_t incx) { cblas_stbmv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, k, A, lda, x, incx); } template <> void ref_tbmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t k, const double* A, int64_t lda, double* x, int64_t incx) { cblas_dtbmv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, k, A, lda, x, incx); } template <> void ref_tbmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t k, const hipblasComplex* A, int64_t lda, hipblasComplex* x, int64_t incx) { cblas_ctbmv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, k, A, lda, x, incx); } template <> void ref_tbmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t k, const hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* x, int64_t incx) { cblas_ztbmv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, k, A, lda, x, incx); } // tbsv template <> void ref_tbsv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t k, const float* A, int64_t lda, float* x, int64_t incx) { cblas_stbsv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, k, A, lda, x, incx); } template <> void ref_tbsv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t k, const double* A, int64_t lda, double* x, int64_t incx) { cblas_dtbsv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, k, A, lda, x, incx); } template <> void ref_tbsv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t k, const hipblasComplex* A, int64_t lda, hipblasComplex* x, int64_t incx) { cblas_ctbsv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, k, A, lda, x, incx); } template <> void ref_tbsv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t k, const hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* x, int64_t incx) { cblas_ztbsv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, k, A, lda, x, incx); } // tpmv template <> void ref_tpmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const float* A, float* x, int64_t incx) { cblas_stpmv( CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, x, incx); } template <> void ref_tpmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const double* A, double* x, int64_t incx) { cblas_dtpmv( CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, x, incx); } template <> void ref_tpmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasComplex* A, hipblasComplex* x, int64_t incx) { cblas_ctpmv( CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, x, incx); } template <> void ref_tpmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasDoubleComplex* A, hipblasDoubleComplex* x, int64_t incx) { cblas_ztpmv( CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, x, incx); } // tpsv template <> void ref_tpsv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const float* AP, float* x, int64_t incx) { cblas_stpsv( CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), n, AP, x, incx); } template <> void ref_tpsv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const double* AP, double* x, int64_t incx) { cblas_dtpsv( CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), n, AP, x, incx); } template <> void ref_tpsv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const hipblasComplex* AP, hipblasComplex* x, int64_t incx) { cblas_ctpsv( CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), n, AP, x, incx); } template <> void ref_tpsv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, const hipblasDoubleComplex* AP, hipblasDoubleComplex* x, int64_t incx) { cblas_ztpsv( CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), n, AP, x, incx); } // trmv template <> void ref_trmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const float* A, int64_t lda, float* x, int64_t incx) { cblas_strmv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, lda, x, incx); } template <> void ref_trmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const double* A, int64_t lda, double* x, int64_t incx) { cblas_dtrmv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, lda, x, incx); } template <> void ref_trmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasComplex* A, int64_t lda, hipblasComplex* x, int64_t incx) { cblas_ctrmv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, lda, x, incx); } template <> void ref_trmv(hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* x, int64_t incx) { cblas_ztrmv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, lda, x, incx); } // trsv template <> void ref_trsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const float* A, int64_t lda, float* x, int64_t incx) { cblas_strsv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, lda, x, incx); } template <> void ref_trsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const double* A, int64_t lda, double* x, int64_t incx) { cblas_dtrsv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, lda, x, incx); } template <> void ref_trsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasComplex* A, int64_t lda, hipblasComplex* x, int64_t incx) { cblas_ctrsv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, lda, x, incx); } template <> void ref_trsv(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* x, int64_t incx) { cblas_ztrsv(CblasColMajor, CBLAS_UPLO(uplo), CBLAS_TRANSPOSE(transA), CBLAS_DIAG(diag), m, A, lda, x, incx); } /* * =========================================================================== * level 3 BLAS * =========================================================================== */ template void ref_geam_helper(hipblasOperation_t transA, hipblasOperation_t transB, int64_t M, int64_t N, T alpha, T* A, int64_t lda, T beta, T* B, int64_t ldb, T* C, int64_t ldc) { int64_t inc1_A = transA == HIPBLAS_OP_N ? 1 : lda; int64_t inc2_A = transA == HIPBLAS_OP_N ? lda : 1; int64_t inc1_B = transB == HIPBLAS_OP_N ? 1 : ldb; int64_t inc2_B = transB == HIPBLAS_OP_N ? ldb : 1; for(int64_t i = 0; i < M; i++) { for(int64_t j = 0; j < N; j++) { T a_val = A[i * inc1_A + j * inc2_A]; T b_val = B[i * inc1_B + j * inc2_B]; if(transA == HIPBLAS_OP_C) a_val = std::conj(a_val); if(transB == HIPBLAS_OP_C) b_val = std::conj(b_val); C[i + j * ldc] = alpha * a_val + beta * b_val; } } } template void ref_dgmm_helper(hipblasSideMode_t side, int64_t M, int64_t N, const T* A, int64_t lda, const T* x, int64_t incx, T* C, int64_t ldc) { ptrdiff_t shift_x = incx < 0 ? -ptrdiff_t(incx) * (N - 1) : 0; for(size_t i1 = 0; i1 < M; i1++) { for(size_t i2 = 0; i2 < N; i2++) { if(HIPBLAS_SIDE_RIGHT == side) { C[i1 + i2 * ldc] = A[i1 + i2 * lda] * x[shift_x + i2 * incx]; } else { C[i1 + i2 * ldc] = A[i1 + i2 * lda] * x[shift_x + i1 * incx]; } } } } // dgmm template <> void ref_dgmm(hipblasSideMode_t side, int64_t M, int64_t N, const float* A, int64_t lda, const float* x, int64_t incx, float* C, int64_t ldc) { ref_dgmm_helper(side, M, N, A, lda, x, incx, C, ldc); } template <> void ref_dgmm(hipblasSideMode_t side, int64_t M, int64_t N, const double* A, int64_t lda, const double* x, int64_t incx, double* C, int64_t ldc) { ref_dgmm_helper(side, M, N, A, lda, x, incx, C, ldc); } template <> void ref_dgmm(hipblasSideMode_t side, int64_t M, int64_t N, const hipblasComplex* A, int64_t lda, const hipblasComplex* x, int64_t incx, hipblasComplex* C, int64_t ldc) { ref_dgmm_helper(side, M, N, A, lda, x, incx, C, ldc); } template <> void ref_dgmm(hipblasSideMode_t side, int64_t M, int64_t N, const hipblasDoubleComplex* A, int64_t lda, const hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* C, int64_t ldc) { ref_dgmm_helper(side, M, N, A, lda, x, incx, C, ldc); } // geam template <> void ref_geam(hipblasOperation_t transa, hipblasOperation_t transb, int64_t m, int64_t n, float* alpha, float* A, int64_t lda, float* beta, float* B, int64_t ldb, float* C, int64_t ldc) { return ref_geam_helper(transa, transb, m, n, *alpha, A, lda, *beta, B, ldb, C, ldc); } template <> void ref_geam(hipblasOperation_t transa, hipblasOperation_t transb, int64_t m, int64_t n, double* alpha, double* A, int64_t lda, double* beta, double* B, int64_t ldb, double* C, int64_t ldc) { return ref_geam_helper(transa, transb, m, n, *alpha, A, lda, *beta, B, ldb, C, ldc); } template <> void ref_geam(hipblasOperation_t transa, hipblasOperation_t transb, int64_t m, int64_t n, hipblasComplex* alpha, hipblasComplex* A, int64_t lda, hipblasComplex* beta, hipblasComplex* B, int64_t ldb, hipblasComplex* C, int64_t ldc) { return ref_geam_helper(transa, transb, m, n, *alpha, A, lda, *beta, B, ldb, C, ldc); } template <> void ref_geam(hipblasOperation_t transa, hipblasOperation_t transb, int64_t m, int64_t n, hipblasDoubleComplex* alpha, hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* beta, hipblasDoubleComplex* B, int64_t ldb, hipblasDoubleComplex* C, int64_t ldc) { return ref_geam_helper(transa, transb, m, n, *alpha, A, lda, *beta, B, ldb, C, ldc); } // gemm template <> void ref_gemm(hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, hipblasHalf alpha, hipblasHalf* A, int64_t lda, hipblasHalf* B, int64_t ldb, hipblasHalf beta, hipblasHalf* C, int64_t ldc) { // cblas does not support hipblasHalf, so convert to higher precision float // This will give more precise result which is acceptable for testing float alpha_float = half_to_float(alpha); float beta_float = half_to_float(beta); size_t sizeA = transA == HIPBLAS_OP_N ? size_t(k) * lda : size_t(m) * lda; size_t sizeB = transB == HIPBLAS_OP_N ? size_t(n) * ldb : size_t(k) * ldb; size_t sizeC = size_t(n) * ldc; std::unique_ptr A_float(new float[sizeA]()); std::unique_ptr B_float(new float[sizeB]()); std::unique_ptr C_float(new float[sizeC]()); for(size_t i = 0; i < sizeA; i++) { A_float[i] = half_to_float(A[i]); } for(size_t i = 0; i < sizeB; i++) { B_float[i] = half_to_float(B[i]); } for(size_t i = 0; i < sizeC; i++) { C_float[i] = half_to_float(C[i]); } // just directly cast, since transA, transB are integers in the enum // printf("transA: rocblas =%d, cblas=%d\n", transA, (CBLAS_TRANSPOSE)transA ); cblas_sgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, alpha_float, const_cast(A_float.get()), lda, const_cast(B_float.get()), ldb, beta_float, static_cast(C_float.get()), ldc); for(size_t i = 0; i < sizeC; i++) { C[i] = float_to_half(C_float[i]); } } template <> void ref_gemm(hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, float alpha, hipblasHalf* A, int64_t lda, hipblasHalf* B, int64_t ldb, float beta, hipblasHalf* C, int64_t ldc) { // cblas does not support hipblasHalf, so convert to higher precision float // This will give more precise result which is acceptable for testing size_t sizeA = transA == HIPBLAS_OP_N ? size_t(k) * lda : size_t(m) * lda; size_t sizeB = transB == HIPBLAS_OP_N ? size_t(n) * ldb : size_t(k) * ldb; size_t sizeC = n * ldc; std::unique_ptr A_float(new float[sizeA]()); std::unique_ptr B_float(new float[sizeB]()); std::unique_ptr C_float(new float[sizeC]()); for(size_t i = 0; i < sizeA; i++) { A_float[i] = half_to_float(A[i]); } for(size_t i = 0; i < sizeB; i++) { B_float[i] = half_to_float(B[i]); } for(size_t i = 0; i < sizeC; i++) { C_float[i] = half_to_float(C[i]); } // just directly cast, since transA, transB are integers in the enum // printf("transA: rocblas =%d, cblas=%d\n", transA, (CBLAS_TRANSPOSE)transA ); cblas_sgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, alpha, const_cast(A_float.get()), lda, const_cast(B_float.get()), ldb, beta, static_cast(C_float.get()), ldc); for(size_t i = 0; i < sizeC; i++) { C[i] = float_to_half(C_float[i]); } } template <> void ref_gemm(hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, float alpha, hipblasHalf* A, int64_t lda, hipblasHalf* B, int64_t ldb, float beta, float* C, int64_t ldc) { // cblas does not support hipblasHalf, so convert to higher precision float // This will give more precise result which is acceptable for testing size_t sizeA = transA == HIPBLAS_OP_N ? size_t(k) * lda : size_t(m) * lda; size_t sizeB = transB == HIPBLAS_OP_N ? size_t(n) * ldb : size_t(k) * ldb; std::unique_ptr A_float(new float[sizeA]()); std::unique_ptr B_float(new float[sizeB]()); for(size_t i = 0; i < sizeA; i++) { A_float[i] = half_to_float(A[i]); } for(size_t i = 0; i < sizeB; i++) { B_float[i] = half_to_float(B[i]); } // just directly cast, since transA, transB are integers in the enum // printf("transA: rocblas =%d, cblas=%d\n", transA, (CBLAS_TRANSPOSE)transA ); cblas_sgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, alpha, const_cast(A_float.get()), lda, const_cast(B_float.get()), ldb, beta, C, ldc); } template <> void ref_gemm(hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, float alpha, hipblasBfloat16* A, int64_t lda, hipblasBfloat16* B, int64_t ldb, float beta, hipblasBfloat16* C, int64_t ldc) { // cblas does not support hipblasBfloat16, so convert to higher precision float // This will give more precise result which is acceptable for testing size_t sizeA = transA == HIPBLAS_OP_N ? size_t(k) * lda : size_t(m) * lda; size_t sizeB = transB == HIPBLAS_OP_N ? size_t(n) * ldb : size_t(k) * ldb; size_t sizeC = size_t(n) * ldc; std::unique_ptr A_float(new float[sizeA]()); std::unique_ptr B_float(new float[sizeB]()); std::unique_ptr C_float(new float[sizeC]()); for(size_t i = 0; i < sizeA; i++) { A_float[i] = bfloat16_to_float(A[i]); } for(size_t i = 0; i < sizeB; i++) { B_float[i] = bfloat16_to_float(B[i]); } for(size_t i = 0; i < sizeC; i++) { C_float[i] = bfloat16_to_float(C[i]); } // just directly cast, since transA, transB are integers in the enum // printf("transA: rocblas =%d, cblas=%d\n", transA, (CBLAS_TRANSPOSE)transA ); cblas_sgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, alpha, const_cast(A_float.get()), lda, const_cast(B_float.get()), ldb, beta, static_cast(C_float.get()), ldc); for(size_t i = 0; i < sizeC; i++) { C[i] = float_to_bfloat16(C_float[i]); } } template <> void ref_gemm(hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, float alpha, hipblasBfloat16* A, int64_t lda, hipblasBfloat16* B, int64_t ldb, float beta, float* C, int64_t ldc) { // cblas does not support hipblasBfloat16, so convert to higher precision float // This will give more precise result which is acceptable for testing size_t sizeA = transA == HIPBLAS_OP_N ? size_t(k) * lda : size_t(m) * lda; size_t sizeB = transB == HIPBLAS_OP_N ? size_t(n) * ldb : size_t(k) * ldb; std::unique_ptr A_float(new float[sizeA]()); std::unique_ptr B_float(new float[sizeB]()); for(size_t i = 0; i < sizeA; i++) { A_float[i] = bfloat16_to_float(A[i]); } for(size_t i = 0; i < sizeB; i++) { B_float[i] = bfloat16_to_float(B[i]); } // just directly cast, since transA, transB are integers in the enum // printf("transA: rocblas =%d, cblas=%d\n", transA, (CBLAS_TRANSPOSE)transA ); cblas_sgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, alpha, const_cast(A_float.get()), lda, const_cast(B_float.get()), ldb, beta, C, ldc); } template <> void ref_gemm(hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, float alpha, float* A, int64_t lda, float* B, int64_t ldb, float beta, float* C, int64_t ldc) { // just directly cast, since transA, transB are integers in the enum // printf("transA: hipblas =%d, cblas=%d\n", transA, (CBLAS_TRANSPOSE)transA ); cblas_sgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> void ref_gemm(hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, double alpha, double* A, int64_t lda, double* B, int64_t ldb, double beta, double* C, int64_t ldc) { cblas_dgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> void ref_gemm(hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, hipblasComplex alpha, hipblasComplex* A, int64_t lda, hipblasComplex* B, int64_t ldb, hipblasComplex beta, hipblasComplex* C, int64_t ldc) { //just directly cast, since transA, transB are integers in the enum cblas_cgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc); } template <> void ref_gemm(hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* B, int64_t ldb, hipblasDoubleComplex beta, hipblasDoubleComplex* C, int64_t ldc) { cblas_zgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc); } template <> void ref_gemm(hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, int32_t alpha, int8_t* A, int64_t lda, int8_t* B, int64_t ldb, int32_t beta, int32_t* C, int64_t ldc) { double alpha_double = static_cast(alpha); double beta_double = static_cast(beta); size_t const sizeA = ((transA == HIPBLAS_OP_N) ? k : m) * size_t(lda); size_t const sizeB = ((transB == HIPBLAS_OP_N) ? n : k) * size_t(ldb); size_t const sizeC = n * size_t(ldc); std::unique_ptr A_double(new double[sizeA]()); std::unique_ptr B_double(new double[sizeB]()); std::unique_ptr C_double(new double[sizeC]()); for(int64_t i = 0; i < sizeA; i++) { A_double[i] = static_cast(A[i]); } for(int64_t i = 0; i < sizeB; i++) { B_double[i] = static_cast(B[i]); } for(int64_t i = 0; i < sizeC; i++) { C_double[i] = static_cast(C[i]); } cblas_dgemm(CblasColMajor, static_cast(transA), static_cast(transB), m, n, k, alpha_double, const_cast(A_double.get()), lda, const_cast(B_double.get()), ldb, beta_double, static_cast(C_double.get()), ldc); for(size_t i = 0; i < sizeC; i++) C[i] = static_cast(C_double[i]); } // hemm template <> void ref_hemm(hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t m, int64_t n, hipblasComplex alpha, hipblasComplex* A, int64_t lda, hipblasComplex* B, int64_t ldb, hipblasComplex beta, hipblasComplex* C, int64_t ldc) { cblas_chemm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, m, n, &alpha, A, lda, B, ldb, &beta, C, ldc); } template <> void ref_hemm(hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t m, int64_t n, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* B, int64_t ldb, hipblasDoubleComplex beta, hipblasDoubleComplex* C, int64_t ldc) { cblas_zhemm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, m, n, &alpha, A, lda, B, ldb, &beta, C, ldc); } // herk template <> void ref_herk(hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, float alpha, hipblasComplex* A, int64_t lda, float beta, hipblasComplex* C, int64_t ldc) { cblas_cherk(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, alpha, A, lda, beta, C, ldc); } template <> void ref_herk(hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, double alpha, hipblasDoubleComplex* A, int64_t lda, double beta, hipblasDoubleComplex* C, int64_t ldc) { cblas_zherk(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, alpha, A, lda, beta, C, ldc); } // herkx template void ref_herkx_local(hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, T alpha, T* A, int64_t lda, T* B, int64_t ldb, U beta, T* C, int64_t ldc) { if(n <= 0 || (beta == 1 && (k == 0 || alpha == T(0.0)))) return; if(transA == HIPBLAS_OP_N) { if(uplo == HIPBLAS_FILL_MODE_UPPER) { for(int64_t j = 0; j < n; ++j) { for(int64_t i = 0; i <= j; i++) C[i + j * ldc] *= T(beta); for(int64_t l = 0; l < k; l++) { T temp = alpha * std::conj(B[j + l * ldb]); for(int64_t i = 0; i <= j; ++i) C[i + j * ldc] += temp * A[i + l * lda]; } } } else // lower { for(int64_t j = 0; j < n; ++j) { for(int64_t i = j; i < n; i++) C[i + j * ldc] *= T(beta); for(int64_t l = 0; l < k; l++) { T temp = alpha * std::conj(B[j + l * ldb]); for(int64_t i = j; i < n; ++i) C[i + j * ldc] += temp * A[i + l * lda]; } } } } else // conjugate transpose { if(uplo == HIPBLAS_FILL_MODE_UPPER) { for(int64_t j = 0; j < n; ++j) for(int64_t i = 0; i <= j; i++) { C[i + j * ldc] *= T(beta); T temp(0); for(int64_t l = 0; l < k; l++) temp += std::conj(A[l + i * lda]) * B[l + j * ldb]; C[i + j * ldc] += alpha * temp; } } else // lower { for(int64_t j = 0; j < n; ++j) for(int64_t i = j; i < n; i++) { C[i + j * ldc] *= T(beta); T temp(0); for(int64_t l = 0; l < k; l++) temp += std::conj(A[l + i * lda]) * B[l + j * ldb]; C[i + j * ldc] += alpha * temp; } } } for(int64_t i = 0; i < n; i++) C[i + i * ldc].imag(0); } template <> void ref_herkx(hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, hipblasComplex alpha, hipblasComplex* A, int64_t lda, hipblasComplex* B, int64_t ldb, float beta, hipblasComplex* C, int64_t ldc) { ref_herkx_local(uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> void ref_herkx(hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* B, int64_t ldb, double beta, hipblasDoubleComplex* C, int64_t ldc) { ref_herkx_local(uplo, transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } // her2k template <> void ref_her2k(hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, hipblasComplex alpha, hipblasComplex* A, int64_t lda, hipblasComplex* B, int64_t ldb, float beta, hipblasComplex* C, int64_t ldc) { cblas_cher2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, &alpha, A, lda, B, ldb, beta, C, ldc); } template <> void ref_her2k(hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* B, int64_t ldb, double beta, hipblasDoubleComplex* C, int64_t ldc) { cblas_zher2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, &alpha, A, lda, B, ldb, beta, C, ldc); } // symm template <> void ref_symm(hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t m, int64_t n, float alpha, float* A, int64_t lda, float* B, int64_t ldb, float beta, float* C, int64_t ldc) { cblas_ssymm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); } template <> void ref_symm(hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t m, int64_t n, double alpha, double* A, int64_t lda, double* B, int64_t ldb, double beta, double* C, int64_t ldc) { cblas_dsymm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); } template <> void ref_symm(hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t m, int64_t n, hipblasComplex alpha, hipblasComplex* A, int64_t lda, hipblasComplex* B, int64_t ldb, hipblasComplex beta, hipblasComplex* C, int64_t ldc) { cblas_csymm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, m, n, &alpha, A, lda, B, ldb, &beta, C, ldc); } template <> void ref_symm(hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t m, int64_t n, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* B, int64_t ldb, hipblasDoubleComplex beta, hipblasDoubleComplex* C, int64_t ldc) { cblas_zsymm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, m, n, &alpha, A, lda, B, ldb, &beta, C, ldc); } // syrk template <> void ref_syrk(hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, float alpha, float* A, int64_t lda, float beta, float* C, int64_t ldc) { cblas_ssyrk(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, alpha, A, lda, beta, C, ldc); } template <> void ref_syrk(hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, double alpha, double* A, int64_t lda, double beta, double* C, int64_t ldc) { cblas_dsyrk(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, alpha, A, lda, beta, C, ldc); } template <> void ref_syrk(hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, hipblasComplex alpha, hipblasComplex* A, int64_t lda, hipblasComplex beta, hipblasComplex* C, int64_t ldc) { cblas_csyrk(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, &alpha, A, lda, &beta, C, ldc); } template <> void ref_syrk(hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex beta, hipblasDoubleComplex* C, int64_t ldc) { cblas_zsyrk(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, &alpha, A, lda, &beta, C, ldc); } // syr2k template <> void ref_syr2k(hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, float alpha, float* A, int64_t lda, float* B, int64_t ldb, float beta, float* C, int64_t ldc) { cblas_ssyr2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> void ref_syr2k(hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, double alpha, double* A, int64_t lda, double* B, int64_t ldb, double beta, double* C, int64_t ldc) { cblas_dsyr2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> void ref_syr2k(hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, hipblasComplex alpha, hipblasComplex* A, int64_t lda, hipblasComplex* B, int64_t ldb, hipblasComplex beta, hipblasComplex* C, int64_t ldc) { cblas_csyr2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc); } template <> void ref_syr2k(hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, hipblasDoubleComplex alpha, hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* B, int64_t ldb, hipblasDoubleComplex beta, hipblasDoubleComplex* C, int64_t ldc) { cblas_zsyr2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc); } // syrkx // Use syrk with A == B for now. /* // trsm template <> void ref_trsm(hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, float alpha, const float* A, int64_t lda, float* B, int64_t ldb) { // just directly cast, since transA, transB are integers in the enum cblas_strsm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, m, n, alpha, A, lda, B, ldb); } template <> void ref_trsm(hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, double alpha, const double* A, int64_t lda, double* B, int64_t ldb) { // just directly cast, since transA, transB are integers in the enum cblas_dtrsm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, m, n, alpha, A, lda, B, ldb); } template <> void ref_trsm(hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, hipblasComplex alpha, const hipblasComplex* A, int64_t lda, hipblasComplex* B, int64_t ldb) { cblas_ctrsm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, m, n, &alpha, A, lda, B, ldb); } template <> void ref_trsm(hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, hipblasDoubleComplex alpha, const hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* B, int64_t ldb) { cblas_ztrsm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, m, n, &alpha, A, lda, B, ldb); } */ // trtri template <> void ref_trtri(char uplo, char diag, int64_t n, float* A, int64_t lda) { lapack_xtrtri(uplo, diag, n, A, lda); } template <> void ref_trtri(char uplo, char diag, int64_t n, double* A, int64_t lda) { lapack_xtrtri(uplo, diag, n, A, lda); } template <> void ref_trtri(char uplo, char diag, int64_t n, hipblasComplex* A, int64_t lda) { lapack_xtrtri(uplo, diag, n, A, lda); } template <> void ref_trtri( char uplo, char diag, int64_t n, hipblasDoubleComplex* A, int64_t lda) { lapack_xtrtri(uplo, diag, n, A, lda); } // trmm template <> void ref_trmm(hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, float alpha, const float* A, int64_t lda, float* B, int64_t ldb) { // just directly cast, since transA, transB are integers in the enum cblas_strmm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, m, n, alpha, A, lda, B, ldb); } template <> void ref_trmm(hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, double alpha, const double* A, int64_t lda, double* B, int64_t ldb) { // just directly cast, since transA, transB are integers in the enum cblas_dtrmm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, m, n, alpha, A, lda, B, ldb); } template <> void ref_trmm(hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, hipblasComplex alpha, const hipblasComplex* A, int64_t lda, hipblasComplex* B, int64_t ldb) { cblas_ctrmm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, m, n, &alpha, A, lda, B, ldb); } template <> void ref_trmm(hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, hipblasDoubleComplex alpha, const hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* B, int64_t ldb) { cblas_ztrmm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, m, n, &alpha, A, lda, B, ldb); } /* * =========================================================================== * LAPACK OR OTHER * =========================================================================== */ #ifdef __HIP_PLATFORM_SOLVER__ // potrf template <> int64_t ref_potrf(char uplo, int64_t m, float* A, int64_t lda) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_spotrf(LAPACK_COL_MAJOR, uplo, m, A, lda); #else spotrf_(&uplo, &m, A, &lda, &info); #endif return info; } template <> int64_t ref_potrf(char uplo, int64_t m, double* A, int64_t lda) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_dpotrf(LAPACK_COL_MAJOR, uplo, m, A, lda); #else dpotrf_(&uplo, &m, A, &lda, &info); #endif return info; } template <> int64_t ref_potrf(char uplo, int64_t m, hipblasComplex* A, int64_t lda) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_cpotrf(LAPACK_COL_MAJOR, uplo, m, (lapack_complex_float*)A, lda); #else cpotrf_(&uplo, &m, A, &lda, &info); #endif return info; } template <> int64_t ref_potrf(char uplo, int64_t m, hipblasDoubleComplex* A, int64_t lda) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_zpotrf(LAPACK_COL_MAJOR, uplo, m, (lapack_complex_double*)A, lda); #else zpotrf_(&uplo, &m, A, &lda, &info); #endif return info; } // getrf template <> int64_t ref_getrf(int64_t m, int64_t n, float* A, int64_t lda, int64_t* ipiv) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_sgetrf(LAPACK_COL_MAJOR, m, n, A, lda, ipiv); #else sgetrf_(&m, &n, A, &lda, ipiv, &info); #endif return info; } template <> int64_t ref_getrf(int64_t m, int64_t n, double* A, int64_t lda, int64_t* ipiv) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_dgetrf(LAPACK_COL_MAJOR, m, n, A, lda, ipiv); #else dgetrf_(&m, &n, A, &lda, ipiv, &info); #endif return info; } template <> int64_t ref_getrf(int64_t m, int64_t n, hipblasComplex* A, int64_t lda, int64_t* ipiv) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_cgetrf(LAPACK_COL_MAJOR, m, n, (lapack_complex_float*)A, lda, ipiv); #else cgetrf_(&m, &n, (hipblasComplex*)A, &lda, ipiv, &info); #endif return info; } template <> int64_t ref_getrf( int64_t m, int64_t n, hipblasDoubleComplex* A, int64_t lda, int64_t* ipiv) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_zgetrf(LAPACK_COL_MAJOR, m, n, (lapack_complex_double*)A, lda, ipiv); #else zgetrf_(&m, &n, (hipblasDoubleComplex*)A, &lda, ipiv, &info); #endif return info; } // getrs template <> int64_t ref_getrs(char trans, int64_t n, int64_t nrhs, float* A, int64_t lda, int64_t* ipiv, float* B, int64_t ldb) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_sgetrs(LAPACK_COL_MAJOR, trans, n, nrhs, A, lda, ipiv, B, ldb); #else sgetrs_(&trans, &n, &nrhs, A, &lda, ipiv, B, &ldb, &info); #endif return info; } template <> int64_t ref_getrs(char trans, int64_t n, int64_t nrhs, double* A, int64_t lda, int64_t* ipiv, double* B, int64_t ldb) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_dgetrs(LAPACK_COL_MAJOR, trans, n, nrhs, A, lda, ipiv, B, ldb); #else dgetrs_(&trans, &n, &nrhs, A, &lda, ipiv, B, &ldb, &info); #endif return info; } template <> int64_t ref_getrs(char trans, int64_t n, int64_t nrhs, hipblasComplex* A, int64_t lda, int64_t* ipiv, hipblasComplex* B, int64_t ldb) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_cgetrs(LAPACK_COL_MAJOR, trans, n, nrhs, (lapack_complex_float*)A, lda, ipiv, (lapack_complex_float*)B, ldb); #else cgetrs_(&trans, &n, &nrhs, (hipblasComplex*)A, &lda, ipiv, (hipblasComplex*)B, &ldb, &info); #endif return info; } template <> int64_t ref_getrs(char trans, int64_t n, int64_t nrhs, hipblasDoubleComplex* A, int64_t lda, int64_t* ipiv, hipblasDoubleComplex* B, int64_t ldb) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_zgetrs(LAPACK_COL_MAJOR, trans, n, nrhs, (lapack_complex_double*)A, lda, ipiv, (lapack_complex_double*)B, ldb); #else zgetrs_(&trans, &n, &nrhs, (hipblasDoubleComplex*)A, &lda, ipiv, (hipblasDoubleComplex*)B, &ldb, &info); #endif return info; } // getri template <> int64_t ref_getri(int64_t n, float* A, int64_t lda, int64_t* ipiv, float* work, int64_t lwork) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_sgetri_work(LAPACK_COL_MAJOR, n, A, lda, ipiv, work, lwork); #else sgetri_(&n, A, &lda, ipiv, work, &lwork, &info); #endif return info; } template <> int64_t ref_getri(int64_t n, double* A, int64_t lda, int64_t* ipiv, double* work, int64_t lwork) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_dgetri_work(LAPACK_COL_MAJOR, n, A, lda, ipiv, work, lwork); #else dgetri_(&n, A, &lda, ipiv, work, &lwork, &info); #endif return info; } template <> int64_t ref_getri( int64_t n, hipblasComplex* A, int64_t lda, int64_t* ipiv, hipblasComplex* work, int64_t lwork) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_cgetri_work(LAPACK_COL_MAJOR, n, (lapack_complex_float*)A, lda, ipiv, (lapack_complex_float*)work, lwork); #else cgetri_(&n, A, &lda, ipiv, work, &lwork, &info); #endif return info; } template <> int64_t ref_getri(int64_t n, hipblasDoubleComplex* A, int64_t lda, int64_t* ipiv, hipblasDoubleComplex* work, int64_t lwork) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_zgetri_work(LAPACK_COL_MAJOR, n, (lapack_complex_double*)A, lda, ipiv, (lapack_complex_double*)work, lwork); #else zgetri_(&n, A, &lda, ipiv, work, &lwork, &info); #endif return info; } // geqrf template <> int64_t ref_geqrf( int64_t m, int64_t n, float* A, int64_t lda, float* tau, float* work, int64_t lwork) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_sgeqrf_work(LAPACK_COL_MAJOR, m, n, A, lda, tau, work, lwork); #else sgeqrf_(&m, &n, A, &lda, tau, work, &lwork, &info); #endif return info; } template <> int64_t ref_geqrf( int64_t m, int64_t n, double* A, int64_t lda, double* tau, double* work, int64_t lwork) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_dgeqrf_work(LAPACK_COL_MAJOR, m, n, A, lda, tau, work, lwork); #else dgeqrf_(&m, &n, A, &lda, tau, work, &lwork, &info); #endif return info; } template <> int64_t ref_geqrf(int64_t m, int64_t n, hipblasComplex* A, int64_t lda, hipblasComplex* tau, hipblasComplex* work, int64_t lwork) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_cgeqrf_work(LAPACK_COL_MAJOR, m, n, (lapack_complex_float*)A, lda, (lapack_complex_float*)tau, (lapack_complex_float*)work, lwork); #else cgeqrf_(&m, &n, A, &lda, tau, work, &lwork, &info); #endif return info; } template <> int64_t ref_geqrf(int64_t m, int64_t n, hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* tau, hipblasDoubleComplex* work, int64_t lwork) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_zgeqrf_work(LAPACK_COL_MAJOR, m, n, (lapack_complex_double*)A, lda, (lapack_complex_double*)tau, (lapack_complex_double*)work, lwork); #else zgeqrf_(&m, &n, A, &lda, tau, work, &lwork, &info); #endif return info; } // gels template <> int64_t ref_gels(char trans, int64_t m, int64_t n, int64_t nrhs, float* A, int64_t lda, float* B, int64_t ldb, float* work, int64_t lwork) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_sgels_work(LAPACK_COL_MAJOR, trans, m, n, nrhs, A, lda, B, ldb, work, lwork); #else sgels_(&trans, &m, &n, &nrhs, A, &lda, B, &ldb, work, &lwork, &info); #endif return info; } template <> int64_t ref_gels(char trans, int64_t m, int64_t n, int64_t nrhs, double* A, int64_t lda, double* B, int64_t ldb, double* work, int64_t lwork) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_dgels_work(LAPACK_COL_MAJOR, trans, m, n, nrhs, A, lda, B, ldb, work, lwork); #else dgels_(&trans, &m, &n, &nrhs, A, &lda, B, &ldb, work, &lwork, &info); #endif return info; } template <> int64_t ref_gels(char trans, int64_t m, int64_t n, int64_t nrhs, hipblasComplex* A, int64_t lda, hipblasComplex* B, int64_t ldb, hipblasComplex* work, int64_t lwork) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_cgels_work(LAPACK_COL_MAJOR, trans, m, n, nrhs, (lapack_complex_float*)A, lda, (lapack_complex_float*)B, ldb, (lapack_complex_float*)work, lwork); #else cgels_(&trans, &m, &n, &nrhs, A, &lda, B, &ldb, work, &lwork, &info); #endif return info; } template <> int64_t ref_gels(char trans, int64_t m, int64_t n, int64_t nrhs, hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* B, int64_t ldb, hipblasDoubleComplex* work, int64_t lwork) { int64_t info; #ifdef FLA_ENABLE_ILP64 info = LAPACKE_zgels_work(LAPACK_COL_MAJOR, trans, m, n, nrhs, (lapack_complex_double*)A, lda, (lapack_complex_double*)B, ldb, (lapack_complex_double*)work, lwork); #else zgels_(&trans, &m, &n, &nrhs, A, &lda, B, &ldb, work, &lwork, &info); #endif return info; } #endif hipBLAS-rocm-6.4.3/clients/common/clients_common.cpp000066400000000000000000001677151500474223100223710ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2023 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "hipblas.hpp" #include "argument_model.hpp" #include "hipblas_data.hpp" #include "hipblas_datatype2string.hpp" #include "hipblas_parse_data.hpp" #include "hipblas_test.hpp" #include "test_cleanup.hpp" #include "type_dispatch.hpp" #include "utility.h" #include #include #include #include #include #include #include #include #include #include // aux #include "auxil/testing_set_get_matrix.hpp" #include "auxil/testing_set_get_matrix_async.hpp" #include "auxil/testing_set_get_vector.hpp" #include "auxil/testing_set_get_vector_async.hpp" // blas1 #include "blas1/testing_asum.hpp" #include "blas1/testing_asum_batched.hpp" #include "blas1/testing_asum_strided_batched.hpp" #include "blas1/testing_axpy.hpp" #include "blas1/testing_axpy_batched.hpp" #include "blas1/testing_axpy_strided_batched.hpp" #include "blas1/testing_copy.hpp" #include "blas1/testing_copy_batched.hpp" #include "blas1/testing_copy_strided_batched.hpp" #include "blas1/testing_dot.hpp" #include "blas1/testing_dot_batched.hpp" #include "blas1/testing_dot_strided_batched.hpp" #include "blas1/testing_iamax_iamin.hpp" #include "blas1/testing_iamax_iamin_batched.hpp" #include "blas1/testing_iamax_iamin_strided_batched.hpp" #include "blas1/testing_nrm2.hpp" #include "blas1/testing_nrm2_batched.hpp" #include "blas1/testing_nrm2_strided_batched.hpp" #include "blas1/testing_rot.hpp" #include "blas1/testing_rot_batched.hpp" #include "blas1/testing_rot_strided_batched.hpp" #include "blas1/testing_rotg.hpp" #include "blas1/testing_rotg_batched.hpp" #include "blas1/testing_rotg_strided_batched.hpp" #include "blas1/testing_rotm.hpp" #include "blas1/testing_rotm_batched.hpp" #include "blas1/testing_rotm_strided_batched.hpp" #include "blas1/testing_rotmg.hpp" #include "blas1/testing_rotmg_batched.hpp" #include "blas1/testing_rotmg_strided_batched.hpp" #include "blas1/testing_scal.hpp" #include "blas1/testing_scal_batched.hpp" #include "blas1/testing_scal_strided_batched.hpp" #include "blas1/testing_swap.hpp" #include "blas1/testing_swap_batched.hpp" #include "blas1/testing_swap_strided_batched.hpp" // blas2 #include "blas2/testing_gbmv.hpp" #include "blas2/testing_gbmv_batched.hpp" #include "blas2/testing_gbmv_strided_batched.hpp" #include "blas2/testing_gemv.hpp" #include "blas2/testing_gemv_batched.hpp" #include "blas2/testing_gemv_strided_batched.hpp" #include "blas2/testing_ger.hpp" #include "blas2/testing_ger_batched.hpp" #include "blas2/testing_ger_strided_batched.hpp" #include "blas2/testing_hbmv.hpp" #include "blas2/testing_hbmv_batched.hpp" #include "blas2/testing_hbmv_strided_batched.hpp" #include "blas2/testing_hemv.hpp" #include "blas2/testing_hemv_batched.hpp" #include "blas2/testing_hemv_strided_batched.hpp" #include "blas2/testing_her.hpp" #include "blas2/testing_her2.hpp" #include "blas2/testing_her2_batched.hpp" #include "blas2/testing_her2_strided_batched.hpp" #include "blas2/testing_her_batched.hpp" #include "blas2/testing_her_strided_batched.hpp" #include "blas2/testing_hpmv.hpp" #include "blas2/testing_hpmv_batched.hpp" #include "blas2/testing_hpmv_strided_batched.hpp" #include "blas2/testing_hpr.hpp" #include "blas2/testing_hpr2.hpp" #include "blas2/testing_hpr2_batched.hpp" #include "blas2/testing_hpr2_strided_batched.hpp" #include "blas2/testing_hpr_batched.hpp" #include "blas2/testing_hpr_strided_batched.hpp" #include "blas2/testing_sbmv.hpp" #include "blas2/testing_sbmv_batched.hpp" #include "blas2/testing_sbmv_strided_batched.hpp" #include "blas2/testing_spmv.hpp" #include "blas2/testing_spmv_batched.hpp" #include "blas2/testing_spmv_strided_batched.hpp" #include "blas2/testing_spr.hpp" #include "blas2/testing_spr2.hpp" #include "blas2/testing_spr2_batched.hpp" #include "blas2/testing_spr2_strided_batched.hpp" #include "blas2/testing_spr_batched.hpp" #include "blas2/testing_spr_strided_batched.hpp" #include "blas2/testing_symv.hpp" #include "blas2/testing_symv_batched.hpp" #include "blas2/testing_symv_strided_batched.hpp" #include "blas2/testing_syr.hpp" #include "blas2/testing_syr2.hpp" #include "blas2/testing_syr2_batched.hpp" #include "blas2/testing_syr2_strided_batched.hpp" #include "blas2/testing_syr_batched.hpp" #include "blas2/testing_syr_strided_batched.hpp" #include "blas2/testing_tbmv.hpp" #include "blas2/testing_tbmv_batched.hpp" #include "blas2/testing_tbmv_strided_batched.hpp" #include "blas2/testing_tbsv.hpp" #include "blas2/testing_tbsv_batched.hpp" #include "blas2/testing_tbsv_strided_batched.hpp" #include "blas2/testing_tpmv.hpp" #include "blas2/testing_tpmv_batched.hpp" #include "blas2/testing_tpmv_strided_batched.hpp" #include "blas2/testing_tpsv.hpp" #include "blas2/testing_tpsv_batched.hpp" #include "blas2/testing_tpsv_strided_batched.hpp" #include "blas2/testing_trmv.hpp" #include "blas2/testing_trmv_batched.hpp" #include "blas2/testing_trmv_strided_batched.hpp" #include "blas2/testing_trsv.hpp" #include "blas2/testing_trsv_batched.hpp" #include "blas2/testing_trsv_strided_batched.hpp" // blas3 #include "blas3/testing_dgmm.hpp" #include "blas3/testing_dgmm_batched.hpp" #include "blas3/testing_dgmm_strided_batched.hpp" #include "blas3/testing_geam.hpp" #include "blas3/testing_geam_batched.hpp" #include "blas3/testing_geam_strided_batched.hpp" #include "blas3/testing_gemm.hpp" #include "blas3/testing_gemm_batched.hpp" #include "blas3/testing_gemm_strided_batched.hpp" #include "blas3/testing_hemm.hpp" #include "blas3/testing_hemm_batched.hpp" #include "blas3/testing_hemm_strided_batched.hpp" #include "blas3/testing_her2k.hpp" #include "blas3/testing_her2k_batched.hpp" #include "blas3/testing_her2k_strided_batched.hpp" #include "blas3/testing_herk.hpp" #include "blas3/testing_herk_batched.hpp" #include "blas3/testing_herk_strided_batched.hpp" #include "blas3/testing_herkx.hpp" #include "blas3/testing_herkx_batched.hpp" #include "blas3/testing_herkx_strided_batched.hpp" #include "blas3/testing_symm.hpp" #include "blas3/testing_symm_batched.hpp" #include "blas3/testing_symm_strided_batched.hpp" #include "blas3/testing_syr2k.hpp" #include "blas3/testing_syr2k_batched.hpp" #include "blas3/testing_syr2k_strided_batched.hpp" #include "blas3/testing_syrk.hpp" #include "blas3/testing_syrk_batched.hpp" #include "blas3/testing_syrk_strided_batched.hpp" #include "blas3/testing_syrkx.hpp" #include "blas3/testing_syrkx_batched.hpp" #include "blas3/testing_syrkx_strided_batched.hpp" #include "blas3/testing_trmm.hpp" #include "blas3/testing_trmm_batched.hpp" #include "blas3/testing_trmm_strided_batched.hpp" #include "blas3/testing_trsm.hpp" #include "blas3/testing_trsm_batched.hpp" #include "blas3/testing_trsm_strided_batched.hpp" #include "blas3/testing_trtri.hpp" #include "blas3/testing_trtri_batched.hpp" #include "blas3/testing_trtri_strided_batched.hpp" #include "syrkx_reference.hpp" // blas_ex #include "blas_ex/testing_axpy_batched_ex.hpp" #include "blas_ex/testing_axpy_ex.hpp" #include "blas_ex/testing_axpy_strided_batched_ex.hpp" #include "blas_ex/testing_dot_batched_ex.hpp" #include "blas_ex/testing_dot_ex.hpp" #include "blas_ex/testing_dot_strided_batched_ex.hpp" #include "blas_ex/testing_gemm_batched_ex.hpp" #include "blas_ex/testing_gemm_ex.hpp" #include "blas_ex/testing_gemm_strided_batched_ex.hpp" #include "blas_ex/testing_nrm2_batched_ex.hpp" #include "blas_ex/testing_nrm2_ex.hpp" #include "blas_ex/testing_nrm2_strided_batched_ex.hpp" #include "blas_ex/testing_rot_batched_ex.hpp" #include "blas_ex/testing_rot_ex.hpp" #include "blas_ex/testing_rot_strided_batched_ex.hpp" #include "blas_ex/testing_scal_batched_ex.hpp" #include "blas_ex/testing_scal_ex.hpp" #include "blas_ex/testing_scal_strided_batched_ex.hpp" #include "blas_ex/testing_trsm_batched_ex.hpp" #include "blas_ex/testing_trsm_ex.hpp" #include "blas_ex/testing_trsm_strided_batched_ex.hpp" // solver functions #ifdef __HIP_PLATFORM_SOLVER__ #include "solver/testing_gels.hpp" #include "solver/testing_gels_batched.hpp" #include "solver/testing_gels_strided_batched.hpp" #include "solver/testing_geqrf.hpp" #include "solver/testing_geqrf_batched.hpp" #include "solver/testing_geqrf_strided_batched.hpp" #include "solver/testing_getrf.hpp" #include "solver/testing_getrf_batched.hpp" #include "solver/testing_getrf_npvt.hpp" #include "solver/testing_getrf_npvt_batched.hpp" #include "solver/testing_getrf_npvt_strided_batched.hpp" #include "solver/testing_getrf_strided_batched.hpp" #include "solver/testing_getri_batched.hpp" #include "solver/testing_getri_npvt_batched.hpp" #include "solver/testing_getrs.hpp" #include "solver/testing_getrs_batched.hpp" #include "solver/testing_getrs_strided_batched.hpp" #endif #include "utility.h" #include #undef I //using namespace roc; // For emulated program_options using namespace std::literals; // For std::string literals of form "str"s struct str_less { bool operator()(const char* a, const char* b) const { return strcmp(a, b) < 0; } }; // Map from const char* to function taking const Arguments& using comparison above using func_map = std::map; // Run a function by using map to map arg.function to function void run_function(const func_map& map, const Arguments& arg, const std::string& msg = "") { auto match = map.find(arg.function); if(match == map.end()) throw std::invalid_argument("Invalid combination --function "s + arg.function + " --a_type "s + hipblas_datatype2string(arg.a_type) + msg); match->second(arg); } void get_test_name(const Arguments& arg, std::string& name) { // Map from const char* to function taking const Arguments& using comparison above using name_to_f_testname_map = std::map; static const name_to_f_testname_map fmap = { // L1 {"asum", testname_asum}, {"asum_batched", testname_asum_batched}, {"asum_strided_batched", testname_asum_strided_batched}, {"axpy", testname_axpy}, {"axpy_batched", testname_axpy_batched}, {"axpy_strided_batched", testname_axpy_strided_batched}, {"axpy_ex", testname_axpy_ex}, {"axpy_batched_ex", testname_axpy_batched_ex}, {"axpy_strided_batched_ex", testname_axpy_strided_batched_ex}, {"copy", testname_copy}, {"copy_batched", testname_copy_batched}, {"copy_strided_batched", testname_copy_strided_batched}, {"dot", testname_dot}, {"dot_batched", testname_dot_batched}, {"dot_strided_batched", testname_dot_strided_batched}, {"dotc", testname_dotc}, {"dotc_batched", testname_dotc_batched}, {"dotc_strided_batched", testname_dotc_strided_batched}, {"iamax", testname_iamax}, {"iamax_batched", testname_iamax_batched}, {"iamax_strided_batched", testname_iamax_strided_batched}, {"iamin", testname_iamin}, {"iamin_batched", testname_iamin_batched}, {"iamin_strided_batched", testname_iamin_strided_batched}, {"nrm2", testname_nrm2}, {"nrm2_batched", testname_nrm2_batched}, {"nrm2_strided_batched", testname_nrm2_strided_batched}, {"nrm2_ex", testname_nrm2_ex}, {"nrm2_batched_ex", testname_nrm2_batched_ex}, {"nrm2_strided_batched_ex", testname_nrm2_strided_batched_ex}, {"rot", testname_rot}, {"rot_batched", testname_rot_batched}, {"rot_strided_batched", testname_rot_strided_batched}, {"rot_ex", testname_rot_ex}, {"rot_batched_ex", testname_rot_batched_ex}, {"rot_strided_batched_ex", testname_rot_strided_batched_ex}, {"rotg", testname_rotg}, {"rotg_batched", testname_rotg_batched}, {"rotg_strided_batched", testname_rotg_strided_batched}, {"rotm", testname_rotm}, {"rotm_batched", testname_rotm_batched}, {"rotm_strided_batched", testname_rotm_strided_batched}, {"rotmg", testname_rotmg}, {"rotmg_batched", testname_rotmg_batched}, {"rotmg_strided_batched", testname_rotmg_strided_batched}, {"swap", testname_swap}, {"swap_batched", testname_swap_batched}, {"swap_strided_batched", testname_swap_strided_batched}, {"scal", testname_scal}, {"scal_batched", testname_scal_batched}, {"scal_strided_batched", testname_scal_strided_batched}, {"scal_ex", testname_scal_ex}, {"scal_batched_ex", testname_scal_batched_ex}, {"scal_strided_batched_ex", testname_scal_strided_batched_ex}, // L2 {"gbmv", testname_gbmv}, {"gbmv_batched", testname_gbmv_batched}, {"gbmv_strided_batched", testname_gbmv_strided_batched}, {"gemv", testname_gemv}, {"gemv_batched", testname_gemv_batched}, {"gemv_strided_batched", testname_gemv_strided_batched}, {"ger", testname_ger}, {"ger_batched", testname_ger_batched}, {"ger_strided_batched", testname_ger_strided_batched}, {"geru", testname_ger}, {"geru_batched", testname_ger_batched}, {"geru_strided_batched", testname_ger_strided_batched}, {"gerc", testname_ger}, {"gerc_batched", testname_ger_batched}, {"gerc_strided_batched", testname_ger_strided_batched}, {"hbmv", testname_hbmv}, {"hbmv_batched", testname_hbmv_batched}, {"hbmv_strided_batched", testname_hbmv_strided_batched}, {"hemv", testname_hemv}, {"hemv_batched", testname_hemv_batched}, {"hemv_strided_batched", testname_hemv_strided_batched}, {"her", testname_her}, {"her_batched", testname_her_batched}, {"her_strided_batched", testname_her_strided_batched}, {"her2", testname_her2}, {"her2_batched", testname_her2_batched}, {"her2_strided_batched", testname_her2_strided_batched}, {"hpmv", testname_hpmv}, {"hpmv_batched", testname_hpmv_batched}, {"hpmv_strided_batched", testname_hpmv_strided_batched}, {"hpr", testname_hpr}, {"hpr_batched", testname_hpr_batched}, {"hpr_strided_batched", testname_hpr_strided_batched}, {"hpr2", testname_hpr2}, {"hpr2_batched", testname_hpr2_batched}, {"hpr2_strided_batched", testname_hpr2_strided_batched}, {"sbmv", testname_sbmv}, {"sbmv_batched", testname_sbmv_batched}, {"sbmv_strided_batched", testname_sbmv_strided_batched}, {"spmv", testname_spmv}, {"spmv_batched", testname_spmv_batched}, {"spmv_strided_batched", testname_spmv_strided_batched}, {"spr", testname_spr}, {"spr_batched", testname_spr_batched}, {"spr_strided_batched", testname_spr_strided_batched}, {"spr2", testname_spr2}, {"spr2_batched", testname_spr2_batched}, {"spr2_strided_batched", testname_spr2_strided_batched}, {"symv", testname_symv}, {"symv_batched", testname_symv_batched}, {"symv_strided_batched", testname_symv_strided_batched}, {"syr", testname_syr}, {"syr_batched", testname_syr_batched}, {"syr_strided_batched", testname_syr_strided_batched}, {"syr2", testname_syr2}, {"syr2_batched", testname_syr2_batched}, {"syr2_strided_batched", testname_syr2_strided_batched}, {"tbmv", testname_tbmv}, {"tbmv_batched", testname_tbmv_batched}, {"tbmv_strided_batched", testname_tbmv_strided_batched}, {"tbsv", testname_tbsv}, {"tbsv_batched", testname_tbsv_batched}, {"tbsv_strided_batched", testname_tbsv_strided_batched}, {"tpmv", testname_tpmv}, {"tpmv_batched", testname_tpmv_batched}, {"tpmv_strided_batched", testname_tpmv_strided_batched}, {"tpsv", testname_tpsv}, {"tpsv_batched", testname_tpsv_batched}, {"tpsv_strided_batched", testname_tpsv_strided_batched}, {"trmv", testname_trmv}, {"trmv_batched", testname_trmv_batched}, {"trmv_strided_batched", testname_trmv_strided_batched}, {"trsv", testname_trsv}, {"trsv_batched", testname_trsv_batched}, {"trsv_strided_batched", testname_trsv_strided_batched}, // L3 {"dgmm", testname_dgmm}, {"dgmm_batched", testname_dgmm_batched}, {"dgmm_strided_batched", testname_dgmm_strided_batched}, {"geam", testname_geam}, {"geam_batched", testname_geam_batched}, {"geam_strided_batched", testname_geam_strided_batched}, {"gemm", testname_gemm}, {"gemm_batched", testname_gemm_batched}, {"gemm_strided_batched", testname_gemm_strided_batched}, {"gemm_ex", testname_gemm_ex}, {"gemm_batched_ex", testname_gemm_batched_ex}, {"gemm_strided_batched_ex", testname_gemm_strided_batched_ex}, {"hemm", testname_hemm}, {"hemm_batched", testname_hemm_batched}, {"hemm_strided_batched", testname_hemm_strided_batched}, {"herk", testname_herk}, {"herk_batched", testname_herk_batched}, {"herk_strided_batched", testname_herk_strided_batched}, {"her2k", testname_her2k}, {"her2k_batched", testname_her2k_batched}, {"her2k_strided_batched", testname_her2k_strided_batched}, {"herkx", testname_herkx}, {"herkx_batched", testname_herkx_batched}, {"herkx_strided_batched", testname_herkx_strided_batched}, {"symm", testname_symm}, {"symm_batched", testname_symm_batched}, {"symm_strided_batched", testname_symm_strided_batched}, {"syrk", testname_syrk}, {"syrk_batched", testname_syrk_batched}, {"syrk_strided_batched", testname_syrk_strided_batched}, {"syr2k", testname_syr2k}, {"syr2k_batched", testname_syr2k_batched}, {"syr2k_strided_batched", testname_syr2k_strided_batched}, {"syrkx", testname_syrkx}, {"syrkx_batched", testname_syrkx_batched}, {"syrkx_strided_batched", testname_syrkx_strided_batched}, {"trmm", testname_trmm}, {"trmm_batched", testname_trmm_batched}, {"trmm_strided_batched", testname_trmm_strided_batched}, {"trsm", testname_trsm}, {"trsm_batched", testname_trsm_batched}, {"trsm_strided_batched", testname_trsm_strided_batched}, {"trsm_ex", testname_trsm_ex}, {"trsm_batched_ex", testname_trsm_batched_ex}, {"trsm_strided_batched_ex", testname_trsm_strided_batched_ex}, {"trtri", testname_trtri}, {"trtri_batched", testname_trtri_batched}, {"trtri_strided_batched", testname_trtri_strided_batched}, #ifdef __HIP_PLATFORM_SOLVER__ {"geqrf", testname_geqrf}, {"geqrf_batched", testname_geqrf_batched}, {"geqrf_strided_batched", testname_geqrf_strided_batched}, {"getrf", testname_getrf}, {"getrf_batched", testname_getrf_batched}, {"getrf_strided_batched", testname_getrf_strided_batched}, {"getrf_npvt", testname_getrf_npvt}, {"getrf_npvt_batched", testname_getrf_npvt_batched}, {"getrf_npvt_strided_batched", testname_getrf_npvt_strided_batched}, {"getri_batched", testname_getri_batched}, {"getri_npvt_batched", testname_getri_npvt_batched}, {"getrs", testname_getrs}, {"getrs_batched", testname_getrs_batched}, {"getrs_strided_batched", testname_getrs_strided_batched}, {"gels", testname_gels}, {"gels_batched", testname_gels_batched}, {"gels_strided_batched", testname_gels_strided_batched}, #endif // Aux {"set_get_vector", testname_set_get_vector}, {"set_get_vector_async", testname_set_get_vector_async}, {"set_get_matrix", testname_set_get_matrix}, {"set_get_matrix_async", testname_set_get_matrix_async}, }; auto match = fmap.find(arg.function); if(match != fmap.end()) match->second(arg, name); } // Template to dispatch testing_gemm_ex for performance tests // When Ti == void or Ti == To == Tc == bfloat16, the test is marked invalid template struct perf_gemm_ex : hipblas_test_invalid { }; template struct perf_gemm_ex{} && !(std::is_same{} && std::is_same{} && std::is_same{})>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"gemm_ex", testing_gemm_ex}, {"gemm_batched_ex", testing_gemm_batched_ex}, }; run_function(map, arg); } }; // Template to dispatch testing_gemm_strided_batched_ex for performance tests // When Ti == void or Ti == To == Tc == bfloat16, the test is marked invalid template struct perf_gemm_strided_batched_ex : hipblas_test_invalid { }; template struct perf_gemm_strided_batched_ex< Ti, To, Tc, std::enable_if_t{} && !(std::is_same{} && std::is_same{} && std::is_same{})>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"gemm_strided_batched_ex", testing_gemm_strided_batched_ex}, }; run_function(map, arg); } }; template struct perf_blas : hipblas_test_invalid { }; template struct perf_blas{} || std::is_same{}>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map fmap = { // L1 {"asum", testing_asum}, {"asum_batched", testing_asum_batched}, {"asum_strided_batched", testing_asum_strided_batched}, {"axpy", testing_axpy}, {"axpy_batched", testing_axpy_batched}, {"axpy_strided_batched", testing_axpy_strided_batched}, {"copy", testing_copy}, {"copy_batched", testing_copy_batched}, {"copy_strided_batched", testing_copy_strided_batched}, {"dot", testing_dot}, {"dot_batched", testing_dot_batched}, {"dot_strided_batched", testing_dot_strided_batched}, {"iamax", testing_iamax}, {"iamax_batched", testing_iamax_batched}, {"iamax_strided_batched", testing_iamax_strided_batched}, {"iamin", testing_iamin}, {"iamin_batched", testing_iamin_batched}, {"iamin_strided_batched", testing_iamin_strided_batched}, {"nrm2", testing_nrm2}, {"nrm2_batched", testing_nrm2_batched}, {"nrm2_strided_batched", testing_nrm2_strided_batched}, {"rotg", testing_rotg}, {"rotg_batched", testing_rotg_batched}, {"rotg_strided_batched", testing_rotg_strided_batched}, {"rotm", testing_rotm}, {"rotm_batched", testing_rotm_batched}, {"rotm_strided_batched", testing_rotm_strided_batched}, {"rotmg", testing_rotmg}, {"rotmg_batched", testing_rotmg_batched}, {"rotmg_strided_batched", testing_rotmg_strided_batched}, {"swap", testing_swap}, {"swap_batched", testing_swap_batched}, {"swap_strided_batched", testing_swap_strided_batched}, {"scal", testing_scal}, {"scal_batched", testing_scal_batched}, {"scal_strided_batched", testing_scal_strided_batched}, // L2 {"gbmv", testing_gbmv}, {"gbmv_batched", testing_gbmv_batched}, {"gbmv_strided_batched", testing_gbmv_strided_batched}, {"gemv", testing_gemv}, {"gemv_batched", testing_gemv_batched}, {"gemv_strided_batched", testing_gemv_strided_batched}, {"ger", testing_ger}, {"ger_batched", testing_ger_batched}, {"ger_strided_batched", testing_ger_strided_batched}, {"sbmv", testing_sbmv}, {"sbmv_batched", testing_sbmv_batched}, {"sbmv_strided_batched", testing_sbmv_strided_batched}, {"spmv", testing_spmv}, {"spmv_batched", testing_spmv_batched}, {"spmv_strided_batched", testing_spmv_strided_batched}, {"spr", testing_spr}, {"spr_batched", testing_spr_batched}, {"spr_strided_batched", testing_spr_strided_batched}, {"spr2", testing_spr2}, {"spr2_batched", testing_spr2_batched}, {"spr2_strided_batched", testing_spr2_strided_batched}, {"symv", testing_symv}, {"symv_batched", testing_symv_batched}, {"symv_strided_batched", testing_symv_strided_batched}, {"syr", testing_syr}, {"syr_batched", testing_syr_batched}, {"syr_strided_batched", testing_syr_strided_batched}, {"syr2", testing_syr2}, {"syr2_batched", testing_syr2_batched}, {"syr2_strided_batched", testing_syr2_strided_batched}, {"tbmv", testing_tbmv}, {"tbmv_batched", testing_tbmv_batched}, {"tbmv_strided_batched", testing_tbmv_strided_batched}, {"tbsv", testing_tbsv}, {"tbsv_batched", testing_tbsv_batched}, {"tbsv_strided_batched", testing_tbsv_strided_batched}, {"tpmv", testing_tpmv}, {"tpmv_batched", testing_tpmv_batched}, {"tpmv_strided_batched", testing_tpmv_strided_batched}, {"tpsv", testing_tpsv}, {"tpsv_batched", testing_tpsv_batched}, {"tpsv_strided_batched", testing_tpsv_strided_batched}, {"trmv", testing_trmv}, {"trmv_batched", testing_trmv_batched}, {"trmv_strided_batched", testing_trmv_strided_batched}, {"trsv", testing_trsv}, {"trsv_batched", testing_trsv_batched}, {"trsv_strided_batched", testing_trsv_strided_batched}, // L3 {"geam", testing_geam}, {"geam_batched", testing_geam_batched}, {"geam_strided_batched", testing_geam_strided_batched}, {"dgmm", testing_dgmm}, {"dgmm_batched", testing_dgmm_batched}, {"dgmm_strided_batched", testing_dgmm_strided_batched}, {"trmm", testing_trmm}, {"trmm_batched", testing_trmm_batched}, {"trmm_strided_batched", testing_trmm_strided_batched}, {"gemm", testing_gemm}, {"gemm_batched", testing_gemm_batched}, {"gemm_strided_batched", testing_gemm_strided_batched}, {"symm", testing_symm}, {"symm_batched", testing_symm_batched}, {"symm_strided_batched", testing_symm_strided_batched}, {"syrk", testing_syrk}, {"syrk_batched", testing_syrk_batched}, {"syrk_strided_batched", testing_syrk_strided_batched}, {"syr2k", testing_syr2k}, {"syr2k_batched", testing_syr2k_batched}, {"syr2k_strided_batched", testing_syr2k_strided_batched}, {"trtri", testing_trtri}, {"trtri_batched", testing_trtri_batched}, {"trtri_strided_batched", testing_trtri_strided_batched}, {"syrkx", testing_syrkx}, {"syrkx_batched", testing_syrkx_batched}, {"syrkx_strided_batched", testing_syrkx_strided_batched}, {"trsm", testing_trsm}, {"trsm_ex", testing_trsm_ex}, {"trsm_batched", testing_trsm_batched}, {"trsm_batched_ex", testing_trsm_batched_ex}, {"trsm_strided_batched", testing_trsm_strided_batched}, {"trsm_strided_batched_ex", testing_trsm_strided_batched_ex}, #ifdef __HIP_PLATFORM_SOLVER__ {"geqrf", testing_geqrf}, {"geqrf_batched", testing_geqrf_batched}, {"geqrf_strided_batched", testing_geqrf_strided_batched}, {"getrf", testing_getrf}, {"getrf_batched", testing_getrf_batched}, {"getrf_strided_batched", testing_getrf_strided_batched}, {"getrf_npvt", testing_getrf_npvt}, {"getrf_npvt_batched", testing_getrf_npvt_batched}, {"getrf_npvt_strided_batched", testing_getrf_npvt_strided_batched}, {"getri_batched", testing_getri_batched}, {"getri_npvt_batched", testing_getri_npvt_batched}, {"getrs", testing_getrs}, {"getrs_batched", testing_getrs_batched}, {"getrs_strided_batched", testing_getrs_strided_batched}, {"gels", testing_gels}, {"gels_batched", testing_gels_batched}, {"gels_strided_batched", testing_gels_strided_batched}, #endif // Aux {"set_get_vector", testing_set_get_vector}, {"set_get_vector_async", testing_set_get_vector_async}, {"set_get_matrix", testing_set_get_matrix}, {"set_get_matrix_async", testing_set_get_matrix_async}, }; run_function(fmap, arg); } }; template struct perf_blas{}>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"dot", testing_dot}, {"dot_batched", testing_dot_batched}, {"dot_strided_batched", testing_dot_strided_batched}, }; run_function(map, arg); } }; template struct perf_blas{}>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"axpy", testing_axpy}, {"axpy_batched", testing_axpy_batched}, {"axpy_strided_batched", testing_axpy_strided_batched}, {"dot", testing_dot}, {"dot_batched", testing_dot_batched}, {"dot_strided_batched", testing_dot_strided_batched}, {"gemm", testing_gemm}, {"gemm_batched", testing_gemm_batched}, {"gemm_strided_batched", testing_gemm_strided_batched}, }; run_function(map, arg); } }; template struct perf_blas< T, U, std::enable_if_t{} || std::is_same{}>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { // L1 {"asum", testing_asum}, {"asum_batched", testing_asum_batched}, {"asum_strided_batched", testing_asum_strided_batched}, {"axpy", testing_axpy}, {"axpy_batched", testing_axpy_batched}, {"axpy_strided_batched", testing_axpy_strided_batched}, {"copy", testing_copy}, {"copy_batched", testing_copy_batched}, {"copy_strided_batched", testing_copy_strided_batched}, {"dot", testing_dot}, {"dot_batched", testing_dot_batched}, {"dot_strided_batched", testing_dot_strided_batched}, {"dotc", testing_dotc}, {"dotc_batched", testing_dotc_batched}, {"dotc_strided_batched", testing_dotc_strided_batched}, {"iamax", testing_iamax}, {"iamax_batched", testing_iamax_batched}, {"iamax_strided_batched", testing_iamax_strided_batched}, {"iamin", testing_iamin}, {"iamin_batched", testing_iamin_batched}, {"iamin_strided_batched", testing_iamin_strided_batched}, {"nrm2", testing_nrm2}, {"nrm2_batched", testing_nrm2_batched}, {"nrm2_strided_batched", testing_nrm2_strided_batched}, {"rotg", testing_rotg}, {"rotg_batched", testing_rotg_batched}, {"rotg_strided_batched", testing_rotg_strided_batched}, {"swap", testing_swap}, {"swap_batched", testing_swap_batched}, {"swap_strided_batched", testing_swap_strided_batched}, {"scal", testing_scal}, {"scal_batched", testing_scal_batched}, {"scal_strided_batched", testing_scal_strided_batched}, // L2 {"gemv", testing_gemv}, {"gemv_batched", testing_gemv_batched}, {"gemv_strided_batched", testing_gemv_strided_batched}, {"gbmv", testing_gbmv}, {"gbmv_batched", testing_gbmv_batched}, {"gbmv_strided_batched", testing_gbmv_strided_batched}, {"geru", testing_ger}, {"geru_batched", testing_ger_batched}, {"geru_strided_batched", testing_ger_strided_batched}, {"gerc", testing_ger}, {"gerc_batched", testing_ger_batched}, {"gerc_strided_batched", testing_ger_strided_batched}, {"hbmv", testing_hbmv}, {"hbmv_batched", testing_hbmv_batched}, {"hbmv_strided_batched", testing_hbmv_strided_batched}, {"hemv", testing_hemv}, {"hemv_batched", testing_hemv_batched}, {"hemv_strided_batched", testing_hemv_strided_batched}, {"her", testing_her}, {"her_batched", testing_her_batched}, {"her_strided_batched", testing_her_strided_batched}, {"her2", testing_her2}, {"her2_batched", testing_her2_batched}, {"her2_strided_batched", testing_her2_strided_batched}, {"hpmv", testing_hpmv}, {"hpmv_batched", testing_hpmv_batched}, {"hpmv_strided_batched", testing_hpmv_strided_batched}, {"hpr", testing_hpr}, {"hpr_batched", testing_hpr_batched}, {"hpr_strided_batched", testing_hpr_strided_batched}, {"hpr2", testing_hpr2}, {"hpr2_batched", testing_hpr2_batched}, {"hpr2_strided_batched", testing_hpr2_strided_batched}, {"spr", testing_spr}, {"spr_batched", testing_spr_batched}, {"spr_strided_batched", testing_spr_strided_batched}, {"symv", testing_symv}, {"symv_batched", testing_symv_batched}, {"symv_strided_batched", testing_symv_strided_batched}, {"syr", testing_syr}, {"syr_batched", testing_syr_batched}, {"syr_strided_batched", testing_syr_strided_batched}, {"syr2", testing_syr2}, {"syr2_batched", testing_syr2_batched}, {"syr2_strided_batched", testing_syr2_strided_batched}, {"tbmv", testing_tbmv}, {"tbmv_batched", testing_tbmv_batched}, {"tbmv_strided_batched", testing_tbmv_strided_batched}, {"tbsv", testing_tbsv}, {"tbsv_batched", testing_tbsv_batched}, {"tbsv_strided_batched", testing_tbsv_strided_batched}, {"tpmv", testing_tpmv}, {"tpmv_batched", testing_tpmv_batched}, {"tpmv_strided_batched", testing_tpmv_strided_batched}, {"tpsv", testing_tpsv}, {"tpsv_batched", testing_tpsv_batched}, {"tpsv_strided_batched", testing_tpsv_strided_batched}, {"trmv", testing_trmv}, {"trmv_batched", testing_trmv_batched}, {"trmv_strided_batched", testing_trmv_strided_batched}, {"trsv", testing_trsv}, {"trsv_batched", testing_trsv_batched}, {"trsv_strided_batched", testing_trsv_strided_batched}, // L3 {"dgmm", testing_dgmm}, {"dgmm_batched", testing_dgmm_batched}, {"dgmm_strided_batched", testing_dgmm_strided_batched}, {"geam", testing_geam}, {"geam_batched", testing_geam_batched}, {"geam_strided_batched", testing_geam_strided_batched}, {"gemm", testing_gemm}, {"gemm_batched", testing_gemm_batched}, {"gemm_strided_batched", testing_gemm_strided_batched}, {"hemm", testing_hemm}, {"hemm_batched", testing_hemm_batched}, {"hemm_strided_batched", testing_hemm_strided_batched}, {"herk", testing_herk}, {"herk_batched", testing_herk_batched}, {"herk_strided_batched", testing_herk_strided_batched}, {"her2k", testing_her2k}, {"her2k_batched", testing_her2k_batched}, {"her2k_strided_batched", testing_her2k_strided_batched}, {"herkx", testing_herkx}, {"herkx_batched", testing_herkx_batched}, {"herkx_strided_batched", testing_herkx_strided_batched}, {"symm", testing_symm}, {"symm_batched", testing_symm_batched}, {"symm_strided_batched", testing_symm_strided_batched}, {"syrk", testing_syrk}, {"syrk_batched", testing_syrk_batched}, {"syrk_strided_batched", testing_syrk_strided_batched}, {"syr2k", testing_syr2k}, {"syr2k_batched", testing_syr2k_batched}, {"syr2k_strided_batched", testing_syr2k_strided_batched}, {"trtri", testing_trtri}, {"trtri_batched", testing_trtri_batched}, {"trtri_strided_batched", testing_trtri_strided_batched}, {"syrkx", testing_syrkx}, {"syrkx_batched", testing_syrkx_batched}, {"syrkx_strided_batched", testing_syrkx_strided_batched}, {"trsm", testing_trsm}, {"trsm_batched", testing_trsm_batched}, {"trsm_strided_batched", testing_trsm_strided_batched}, {"trsm_ex", testing_trsm_ex}, {"trsm_batched_ex", testing_trsm_batched_ex}, {"trsm_strided_batched_ex", testing_trsm_strided_batched_ex}, {"trmm", testing_trmm}, {"trmm_batched", testing_trmm_batched}, {"trmm_strided_batched", testing_trmm_strided_batched}, #ifdef __HIP_PLATFORM_SOLVER__ {"geqrf", testing_geqrf}, {"geqrf_batched", testing_geqrf_batched}, {"geqrf_strided_batched", testing_geqrf_strided_batched}, {"getrf", testing_getrf}, {"getrf_batched", testing_getrf_batched}, {"getrf_strided_batched", testing_getrf_strided_batched}, {"getrf_npvt", testing_getrf_npvt}, {"getrf_npvt_batched", testing_getrf_npvt_batched}, {"getrf_npvt_strided_batched", testing_getrf_npvt_strided_batched}, {"getri_batched", testing_getri_batched}, {"getri_npvt_batched", testing_getri_npvt_batched}, {"getrs", testing_getrs}, {"getrs_batched", testing_getrs_batched}, {"getrs_strided_batched", testing_getrs_strided_batched}, {"gels", testing_gels}, {"gels_batched", testing_gels_batched}, {"gels_strided_batched", testing_gels_strided_batched}, #endif }; run_function(map, arg); } }; template struct perf_blas_axpy_ex : hipblas_test_invalid { }; template struct perf_blas_axpy_ex< Ta, Tx, Ty, Tex, std::enable_if_t< (std::is_same_v< Ta, float> && std::is_same_v && std::is_same_v && std::is_same_v) || (std::is_same_v< Ta, double> && std::is_same_v && std::is_same_v && std::is_same_v) || (std::is_same_v< Ta, hipblasHalf> && std::is_same_v && std::is_same_v && std::is_same_v) || (std::is_same_v< Ta, hipblasComplex> && std::is_same_v && std::is_same_v && std::is_same_v) || (std::is_same_v< Ta, hipblasDoubleComplex> && std::is_same_v && std::is_same_v && std::is_same_v) || (std::is_same_v< Ta, hipblasHalf> && std::is_same_v && std::is_same_v && std::is_same_v) || (std::is_same_v< Ta, float> && std::is_same_v && std::is_same_v && std::is_same_v) || (std::is_same_v< Ta, hipblasBfloat16> && std::is_same_v && std::is_same_v && std::is_same_v) || (std::is_same_v< Ta, float> && std::is_same_v && std::is_same_v && std::is_same_v)>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"axpy_ex", testing_axpy_ex}, {"axpy_batched_ex", testing_axpy_batched_ex}, {"axpy_strided_batched_ex", testing_axpy_strided_batched_ex}, }; run_function(map, arg); } }; template struct perf_blas_dot_ex : hipblas_test_invalid { }; template struct perf_blas_dot_ex< Tx, Ty, Tr, Tex, std::enable_if_t<(std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{})>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"dot_ex", testing_dot_ex}, {"dot_batched_ex", testing_dot_batched_ex}, {"dot_strided_batched_ex", testing_dot_strided_batched_ex}, {"dotc_ex", testing_dot_ex}, {"dotc_batched_ex", testing_dot_batched_ex}, {"dotc_strided_batched_ex", testing_dot_strided_batched_ex}, }; run_function(map, arg); } }; template struct perf_blas_nrm2_ex : hipblas_test_invalid { }; template struct perf_blas_nrm2_ex< Tx, Tr, Tex, std::enable_if_t< (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{})>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"nrm2_ex", testing_nrm2_ex}, {"nrm2_batched_ex", testing_nrm2_batched_ex}, {"nrm2_strided_batched_ex", testing_nrm2_strided_batched_ex}, }; run_function(map, arg); } }; template struct perf_blas_rot_ex : hipblas_test_invalid { }; template struct perf_blas_rot_ex< Tx, Ty, Tcs, Tex, std::enable_if_t<(std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{} && std::is_same{})>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"rot_ex", testing_rot_ex}, {"rot_batched_ex", testing_rot_batched_ex}, {"rot_strided_batched_ex", testing_rot_strided_batched_ex}, }; run_function(map, arg); } }; template struct perf_blas_rot : hipblas_test_invalid { }; template struct perf_blas_rot< Ti, To, Tc, std::enable_if_t<(std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{})>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"rot", testing_rot}, {"rot_batched", testing_rot_batched}, {"rot_strided_batched", testing_rot_strided_batched}, }; run_function(map, arg); } }; template struct perf_blas_scal : hipblas_test_invalid { }; template struct perf_blas_scal< Ta, Tb, std::enable_if_t<(std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{})>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"scal", testing_scal}, {"scal_batched", testing_scal_batched}, {"scal_strided_batched", testing_scal_strided_batched}, }; run_function(map, arg); } }; template struct perf_blas_scal_ex : hipblas_test_invalid { }; template struct perf_blas_scal_ex< Ta, Tx, Tex, std::enable_if_t< (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{})>> : hipblas_test_valid { void operator()(const Arguments& arg) { static const func_map map = { {"scal_ex", testing_scal_ex}, {"scal_batched_ex", testing_scal_batched_ex}, {"scal_strided_batched_ex", testing_scal_strided_batched_ex}, }; run_function(map, arg); } }; int run_bench_test(Arguments& arg, int unit_check, int timing) { //hipblas_initialize(); // Initialize rocBLAS std::cout << std::setiosflags(std::ios::fixed) << std::setprecision(7); // Set precision to 7 digits // disable unit_check in client benchmark, it is only used in gtest unit test arg.unit_check = unit_check; // enable timing check,otherwise no performance data collected arg.timing = timing; // Skip past any testing_ prefix in function static constexpr char prefix[] = "testing_"; const char* function = arg.function; if(!strncmp(function, prefix, sizeof(prefix) - 1)) function += sizeof(prefix) - 1; if(!strcmp(function, "gemm") || !strcmp(function, "gemm_batched")) { // adjust dimension for GEMM routines int64_t min_lda = arg.transA == 'N' ? arg.M : arg.K; int64_t min_ldb = arg.transB == 'N' ? arg.K : arg.N; int64_t min_ldc = arg.M; if(arg.lda < min_lda) { std::cout << "hipblas-bench INFO: lda < min_lda, set lda = " << min_lda << std::endl; arg.lda = min_lda; } if(arg.ldb < min_ldb) { std::cout << "hipblas-bench INFO: ldb < min_ldb, set ldb = " << min_ldb << std::endl; arg.ldb = min_ldb; } if(arg.ldc < min_ldc) { std::cout << "hipblas-bench INFO: ldc < min_ldc, set ldc = " << min_ldc << std::endl; arg.ldc = min_ldc; } } else if(!strcmp(function, "gemm_strided_batched")) { // adjust dimension for GEMM routines int64_t min_lda = arg.transA == 'N' ? arg.M : arg.K; int64_t min_ldb = arg.transB == 'N' ? arg.K : arg.N; int64_t min_ldc = arg.M; if(arg.lda < min_lda) { std::cout << "hipblas-bench INFO: lda < min_lda, set lda = " << min_lda << std::endl; arg.lda = min_lda; } if(arg.ldb < min_ldb) { std::cout << "hipblas-bench INFO: ldb < min_ldb, set ldb = " << min_ldb << std::endl; arg.ldb = min_ldb; } if(arg.ldc < min_ldc) { std::cout << "hipblas-bench INFO: ldc < min_ldc, set ldc = " << min_ldc << std::endl; arg.ldc = min_ldc; } // int64_t min_stride_a = // arg.transA == 'N' ? arg.K * arg.lda : arg.M * arg.lda; // int64_t min_stride_b = // arg.transB == 'N' ? arg.N * arg.ldb : arg.K * arg.ldb; // int64_t min_stride_a = // arg.transA == 'N' ? arg.K * arg.lda : arg.M * arg.lda; // int64_t min_stride_b = // arg.transB == 'N' ? arg.N * arg.ldb : arg.K * arg.ldb; int64_t min_stride_c = arg.ldc * arg.N; // if (arg.stride_a < min_stride_a) // { // std::cout << "hipblas-bench INFO: stride_a < min_stride_a, set stride_a = " << // min_stride_a << std::endl; // arg.stride_a = min_stride_a; // } // if (arg.stride_b < min_stride_b) // { // std::cout << "hipblas-bench INFO: stride_b < min_stride_b, set stride_b = " << // min_stride_b << std::endl; // arg.stride_b = min_stride_b; // } if(arg.stride_c < min_stride_c) { std::cout << "hipblas-bench INFO: stride_c < min_stride_c, set stride_c = " << min_stride_c << std::endl; arg.stride_c = min_stride_c; } } if(!strcmp(function, "gemm_ex") || !strcmp(function, "gemm_batched_ex")) { // adjust dimension for GEMM routines int64_t min_lda = arg.transA == 'N' ? arg.M : arg.K; int64_t min_ldb = arg.transB == 'N' ? arg.K : arg.N; int64_t min_ldc = arg.M; int64_t min_ldd = arg.M; if(arg.lda < min_lda) { std::cout << "hipblas-bench INFO: lda < min_lda, set lda = " << min_lda << std::endl; arg.lda = min_lda; } if(arg.ldb < min_ldb) { std::cout << "hipblas-bench INFO: ldb < min_ldb, set ldb = " << min_ldb << std::endl; arg.ldb = min_ldb; } if(arg.ldc < min_ldc) { std::cout << "hipblas-bench INFO: ldc < min_ldc, set ldc = " << min_ldc << std::endl; arg.ldc = min_ldc; } if(arg.ldd < min_ldd) { std::cout << "hipblas-bench INFO: ldd < min_ldd, set ldd = " << min_ldc << std::endl; arg.ldd = min_ldd; } hipblas_gemm_dispatch(arg); } else if(!strcmp(function, "gemm_strided_batched_ex")) { // adjust dimension for GEMM routines int64_t min_lda = arg.transA == 'N' ? arg.M : arg.K; int64_t min_ldb = arg.transB == 'N' ? arg.K : arg.N; int64_t min_ldc = arg.M; int64_t min_ldd = arg.M; if(arg.lda < min_lda) { std::cout << "hipblas-bench INFO: lda < min_lda, set lda = " << min_lda << std::endl; arg.lda = min_lda; } if(arg.ldb < min_ldb) { std::cout << "hipblas-bench INFO: ldb < min_ldb, set ldb = " << min_ldb << std::endl; arg.ldb = min_ldb; } if(arg.ldc < min_ldc) { std::cout << "hipblas-bench INFO: ldc < min_ldc, set ldc = " << min_ldc << std::endl; arg.ldc = min_ldc; } if(arg.ldd < min_ldd) { std::cout << "hipblas-bench INFO: ldd < min_ldd, set ldd = " << min_ldc << std::endl; arg.ldd = min_ldd; } int64_t min_stride_c = arg.ldc * arg.N; if(arg.stride_c < min_stride_c) { std::cout << "hipblas-bench INFO: stride_c < min_stride_c, set stride_c = " << min_stride_c << std::endl; arg.stride_c = min_stride_c; } hipblas_gemm_dispatch(arg); } else { if(!strcmp(function, "scal_ex") || !strcmp(function, "scal_batched_ex") || !strcmp(function, "scal_strided_batched_ex")) hipblas_blas1_ex_dispatch(arg); /* if(!strcmp(function, "scal") || !strcmp(function, "scal_batched") || !strcmp(function, "scal_strided_batched")) hipblas_blas1_dispatch(arg); */ else if(!strcmp(function, "rot") || !strcmp(function, "rot_batched") || !strcmp(function, "rot_strided_batched")) hipblas_rot_dispatch(arg); else if(!strcmp(function, "axpy_ex") || !strcmp(function, "axpy_batched_ex") || !strcmp(function, "axpy_strided_batched_ex")) hipblas_blas1_ex_dispatch(arg); else if(!strcmp(function, "dot_ex") || !strcmp(function, "dot_batched_ex") || !strcmp(function, "dot_strided_batched_ex") || !strcmp(function, "dotc_ex") || !strcmp(function, "dotc_batched_ex") || !strcmp(function, "dotc_strided_batched_ex")) hipblas_blas1_ex_dispatch(arg); else if(!strcmp(function, "nrm2_ex") || !strcmp(function, "nrm2_batched_ex") || !strcmp(function, "nrm2_strided_batched_ex")) hipblas_blas1_ex_dispatch(arg); else if(!strcmp(function, "rot_ex") || !strcmp(function, "rot_batched_ex") || !strcmp(function, "rot_strided_batched_ex")) hipblas_blas1_ex_dispatch(arg); else hipblas_simple_dispatch(arg); } return 0; } hipBLAS-rocm-6.4.3/clients/common/hipblas_arguments.cpp000066400000000000000000000114031500474223100230450ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "hipblas_arguments.hpp" #include "tuple_helper.hpp" #include #include #include #include #include bool gpu_arch_match(const std::string& gpu_arch, const char pattern[4]) { int gpu_len = gpu_arch.length(); const char* gpu = gpu_arch.c_str(); // gpu is currently "gfx" followed by 3 or 4 characters, followed by optional ":" sections int prefix_len = 3; for(int i = 0; i < 4; i++) { if(!pattern[i]) break; else if(pattern[i] == '?') continue; else if(prefix_len + i >= gpu_len || pattern[i] != gpu[prefix_len + i]) return false; } return true; }; // Pairs for YAML output template std::ostream& operator<<(std::ostream& os, std::pair p) { os << p.first << ": "; os << p.second; return os; } // Function to print Arguments out to stream in YAML format std::ostream& operator<<(std::ostream& os, const Arguments& arg) { // delim starts as "{ " and becomes ", " afterwards auto print_pair = [&, delim = "{ "](const char* name, const auto& value) mutable { os << delim << std::make_pair(name, value); delim = ", "; }; // Print each (name, value) tuple pair #define NAME_VALUE_PAIR(NAME) print_pair(#NAME, arg.NAME) FOR_EACH_ARGUMENT(NAME_VALUE_PAIR, ;); // Closing brace return os << " }\n"; } // Google Tests uses this automatically with std::ostream to dump parameters /* std::ostream& operator<<(std::ostream& os, const Arguments& arg) { std::ostream oss; // Print to std::ostream, then transfer to std::ostream return os << arg; }*/ // Function to read Structures data from stream std::istream& operator>>(std::istream& is, Arguments& arg) { is.read(reinterpret_cast(&arg), sizeof(arg)); return is; } // Error message about incompatible binary file format static void validation_error [[noreturn]] (const char* name) { std::cerr << "Arguments field \"" << name << "\" does not match format.\n\n" "Fatal error: Binary test data does match input format.\n" "Ensure that hipblas_arguments.hpp and hipblas_common.yaml\n" "define exactly the same Arguments, that hipblas_gentest.py\n" "generates the data correctly, and that endianness is the same." << std::endl; abort(); } // hipblas_gentest.py is expected to conform to this format. // hipblas_gentest.py uses hipblas_common.yaml to generate this format. void Arguments::validate(std::istream& ifs) { char header[8]{}, trailer[8]{}; Arguments arg{}; ifs.read(header, sizeof(header)); ifs >> arg; ifs.read(trailer, sizeof(trailer)); if(strcmp(header, "hipBLAS")) validation_error("header"); if(strcmp(trailer, "HIPblas")) validation_error("trailer"); auto check_func = [sig = 0u](const char* name, const auto& value) mutable { static_assert(sizeof(value) <= 256, "Fatal error: Arguments field is too large (greater than 256 bytes)."); for(size_t i = 0; i < sizeof(value); ++i) { if(reinterpret_cast(&value)[i] ^ sig ^ i) validation_error(name); } sig = (sig + 89) % 256; }; // Apply check_func to each pair (name, value) of Arguments as a tuple #define CHECK_FUNC(NAME) check_func(#NAME, arg.NAME) FOR_EACH_ARGUMENT(CHECK_FUNC, ;); } hipBLAS-rocm-6.4.3/clients/common/hipblas_datatype2string.cpp000066400000000000000000000142771500474223100242000ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2023 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * * ************************************************************************ */ #include "hipblas_datatype2string.hpp" #include "hipblas.h" // clang-format off hipblas_initialization string2hipblas_initialization(const std::string& value) { return value == "rand_int" ? hipblas_initialization::rand_int : value == "trig_float" ? hipblas_initialization::trig_float : value == "hpl" ? hipblas_initialization::hpl : static_cast(0); // invalid enum } // clang-format on /* ============================================================================================ */ /* Convert hipblas constants to lapack char. */ char hipblas2char_operation(hipblasOperation_t value) { switch(value) { case HIPBLAS_OP_N: return 'N'; case HIPBLAS_OP_T: return 'T'; case HIPBLAS_OP_C: return 'C'; } return '\0'; } char hipblas2char_fill(hipblasFillMode_t value) { switch(value) { case HIPBLAS_FILL_MODE_UPPER: return 'U'; case HIPBLAS_FILL_MODE_LOWER: return 'L'; case HIPBLAS_FILL_MODE_FULL: return 'F'; } return '\0'; } char hipblas2char_diagonal(hipblasDiagType_t value) { switch(value) { case HIPBLAS_DIAG_UNIT: return 'U'; case HIPBLAS_DIAG_NON_UNIT: return 'N'; } return '\0'; } char hipblas2char_side(hipblasSideMode_t value) { switch(value) { case HIPBLAS_SIDE_LEFT: return 'L'; case HIPBLAS_SIDE_RIGHT: return 'R'; case HIPBLAS_SIDE_BOTH: return 'B'; } return '\0'; } /* ============================================================================================ */ /* Convert lapack char constants to hipblas type. */ hipblasOperation_t char2hipblas_operation(char value) { switch(value) { case 'N': return HIPBLAS_OP_N; case 'T': return HIPBLAS_OP_T; case 'C': return HIPBLAS_OP_C; case 'n': return HIPBLAS_OP_N; case 't': return HIPBLAS_OP_T; case 'c': return HIPBLAS_OP_C; } return HIPBLAS_OP_N; } hipblasFillMode_t char2hipblas_fill(char value) { switch(value) { case 'U': return HIPBLAS_FILL_MODE_UPPER; case 'L': return HIPBLAS_FILL_MODE_LOWER; case 'u': return HIPBLAS_FILL_MODE_UPPER; case 'l': return HIPBLAS_FILL_MODE_LOWER; } return HIPBLAS_FILL_MODE_LOWER; } hipblasDiagType_t char2hipblas_diagonal(char value) { switch(value) { case 'U': return HIPBLAS_DIAG_UNIT; case 'N': return HIPBLAS_DIAG_NON_UNIT; case 'u': return HIPBLAS_DIAG_UNIT; case 'n': return HIPBLAS_DIAG_NON_UNIT; } return HIPBLAS_DIAG_NON_UNIT; } hipblasSideMode_t char2hipblas_side(char value) { switch(value) { case 'L': return HIPBLAS_SIDE_LEFT; case 'R': return HIPBLAS_SIDE_RIGHT; case 'l': return HIPBLAS_SIDE_LEFT; case 'r': return HIPBLAS_SIDE_RIGHT; } return HIPBLAS_SIDE_LEFT; } // clang-format off hipblasDatatype_t string2hipblas_datatype(const std::string& value) { return value == "f16_r" || value == "h" ? HIPBLAS_R_16F : value == "f32_r" || value == "s" ? HIPBLAS_R_32F : value == "f64_r" || value == "d" ? HIPBLAS_R_64F : value == "bf16_r" ? HIPBLAS_R_16B : value == "f16_c" ? HIPBLAS_C_16B : value == "f32_c" || value == "c" ? HIPBLAS_C_32F : value == "f64_c" || value == "z" ? HIPBLAS_C_64F : value == "bf16_c" ? HIPBLAS_C_16B : value == "i8_r" ? HIPBLAS_R_8I : value == "i32_r" ? HIPBLAS_R_32I : value == "i8_c" ? HIPBLAS_C_8I : value == "i32_c" ? HIPBLAS_C_32I : value == "u8_r" ? HIPBLAS_R_8U : value == "u32_r" ? HIPBLAS_R_32U : value == "u8_c" ? HIPBLAS_C_8U : value == "u32_c" ? HIPBLAS_C_32U : HIPBLAS_DATATYPE_INVALID; } hipblasComputeType_t string2hipblas_computetype(const std::string& value) { return value == "c16f" ? HIPBLAS_COMPUTE_16F : value == "c16f_pedantic" ? HIPBLAS_COMPUTE_16F_PEDANTIC : value == "c32f" ? HIPBLAS_COMPUTE_32F : value == "c32f_pedantic" ? HIPBLAS_COMPUTE_32F_PEDANTIC : value == "c32f_fast_16f" ? HIPBLAS_COMPUTE_32F_FAST_16F : value == "c32f_fast_16Bf" ? HIPBLAS_COMPUTE_32F_FAST_16BF : value == "c32f_fast_tf32" ? HIPBLAS_COMPUTE_32F_FAST_TF32 : value == "c64f" ? HIPBLAS_COMPUTE_64F : value == "c64f_pedantic" ? HIPBLAS_COMPUTE_64F_PEDANTIC : value == "c32i" ? HIPBLAS_COMPUTE_32I : value == "c32i_pedantic" ? HIPBLAS_COMPUTE_32I_PEDANTIC : HIPBLAS_COMPUTE_32F; // Default } // clang-format on hipBLAS-rocm-6.4.3/clients/common/hipblas_gentest.py000077500000000000000000000602311500474223100223650ustar00rootroot00000000000000#!/usr/bin/env python3 """Copyright (C) 2018-2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ import re import sys import os import argparse import ctypes from fnmatch import fnmatchcase try: # Import either the C or pure-Python YAML parser from yaml import CLoader as Loader except ImportError: from yaml import Loader import yaml # Regex for type names in the YAML file. Optional *nnn indicates array. TYPE_RE = re.compile(r'[a-z_A-Z]\w*(:?\s*\*\s*\d+)?$') # Regex for integer ranges A..B[..C] INT_RANGE_RE = re.compile( r'\s*(-?\d+)\s*\.\.\s*(-?\d+)\s*(?:\.\.\s*(-?\d+)\s*)?$') # Regex for include: YAML extension INCLUDE_RE = re.compile(r'include\s*:\s*(.*)') args = {} testcases = set() datatypes = {} param = {} def main(): args.update(parse_args().__dict__) for doc in get_yaml_docs(): process_doc(doc) def process_doc(doc): """Process one document in the YAML file""" # Ignore empty documents if not doc or not doc.get('Tests'): return # Clear datatypes and params from previous documents datatypes.clear() param.clear() # Return dictionary of all known datatypes datatypes.update(get_datatypes(doc)) # Arguments structure corresponding to C/C++ structure param['Arguments'] = type('Arguments', (ctypes.Structure,), {'_fields_': get_arguments(doc)}) # Special names which get expanded as lists of arguments param['dict_lists_to_expand'] = doc.get('Dictionary lists to expand') or () # Lists which are not expanded param['lists_to_not_expand'] = doc.get('Lists to not expand') or () # Defaults defaults = doc.get('Defaults') or {} # Known Bugs param['known_bugs'] = doc.get('Known bugs') or [] # Functions param['Functions'] = doc.get('Functions') or {} # Instantiate all of the tests, starting with defaults for test in doc['Tests']: case = defaults.copy() case.update(test) generate(case, instantiate) def parse_args(): """Parse command-line arguments, returning input and output files""" parser = argparse.ArgumentParser(description=""" Expand hipBLAS YAML test data file into binary Arguments records """) parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin) parser.add_argument('-o', '--out', dest='outfile', type=argparse.FileType('wb'), default=sys.stdout) parser.add_argument('-I', help="Add include path", action='append', dest='includes', default=[]) parser.add_argument('-t', '--template', type=argparse.FileType('r')) parser.add_argument('--hipblas_v2', action='store_true', help="Uses HIPBLAS_V2 datatypes, ensure HIPBLAS_V2 is defined in your build when using this.") return parser.parse_args() def read_yaml_file(file): """Read the YAML file, processing include: lines as an extension""" file_dir = os.path.dirname(file.name) or os.getcwd() source = [] for line_no, line in enumerate(file, start=1): # Keep track of file names and line numbers for each line of YAML match = line.startswith('include') and INCLUDE_RE.match(line) if not match: source.append([line, file.name, line_no]) else: include_file = match.group(1) include_dirs = [file_dir] + args['includes'] for path in include_dirs: path = os.path.join(path, include_file) if os.path.exists(path): source.extend(read_yaml_file(open(path, 'r'))) break else: sys.exit("In file " + file.name + ", line " + str(line_no) + ", column " + str(match.start(1)+1) + ":\n" + line.rstrip() + "\n" + " " * match.start(1) + "^\nCannot open " + include_file + "\n\nInclude paths:\n" + "\n".join(include_dirs)) file.close() return source def get_yaml_docs(): """Parse the YAML file""" source = read_yaml_file(args['infile']) if args.get('template'): source = read_yaml_file(args['template']) + source source_str = ''.join([line[0] for line in source]) def mark_str(mark): line = source[mark.line] return("In file " + line[1] + ", line " + str(line[2]) + ", column " + str(mark.column + 1) + ":\n" + line[0].rstrip() + "\n" + ' ' * mark.column + "^\n") # We iterate through all of the documents to properly diagnose errors, # because the load_all generator does not handle exceptions correctly. docs = [] load = Loader(source_str) while load.check_data(): try: doc = load.get_data() except yaml.YAMLError as err: sys.exit((mark_str(err.problem_mark) if err.problem_mark else "") + (err.problem + "\n" if err.problem else "") + (err.note + "\n" if err.note else "")) else: docs.append(doc) return docs def get_datatypes(doc): """ Get datatypes from YAML doc""" dt = ctypes.__dict__.copy() for declaration in doc.get('Datatypes') or (): for name, decl in declaration.items(): if isinstance(decl, dict): # Create derived class type based on bases and attr entries decl_attr = decl.get('attr_v2') if args.get('hipblas_v2') else decl.get('attr') dt[name] = type(name, tuple([eval(t, dt) for t in decl.get('bases') or () if TYPE_RE.match(t)] ), decl_attr or {}) # Import class' attributes into the datatype namespace for subtype in decl_attr or {}: if TYPE_RE.match(subtype): dt[subtype] = eval(name+'.'+subtype, dt) elif isinstance(decl, str) and TYPE_RE.match(decl): dt[name] = dt[decl] else: sys.exit("Unrecognized data type "+name+": "+repr(decl)) return dt def get_arguments(doc): """The kernel argument list, with argument names and types""" return [(var, eval(decl[var], datatypes)) for decl in doc.get('Arguments') or () if len(decl) == 1 for var in decl if TYPE_RE.match(decl[var])] def setkey_product(test, key, vals): """Helper for setdefaults. Tests that all values in vals is present in test, if so then sets test[key] to product of all test[vals].""" if all(x in test for x in vals): result = 1 for x in vals: if x in ('incx', 'incy'): result *= abs(test[x]) else: result *= test[x] test[key] = int(result) def setdefaults(test): """Set default values for parameters""" # Do not put constant defaults here -- use hipblas_common.yaml for that. # These are only for dynamic defaults # TODO: This should be ideally moved to YAML file, with eval'd expressions. # TODO: move to use hipblas names and decide if we want any auto defaults or just yaml if test['function'] in ('asum_strided_batched', 'nrm2_strided_batched', 'scal_strided_batched', 'swap_strided_batched', 'copy_strided_batched', 'dot_strided_batched', 'dotc_strided_batched', 'dot_strided_batched_ex', 'dotc_strided_batched_ex', 'rot_strided_batched', 'rot_strided_batched_ex', 'rotm_strided_batched', 'iamax_strided_batched', 'iamin_strided_batched', 'axpy_strided_batched', 'axpy_strided_batched_ex', 'nrm2_strided_batched_ex', 'scal_strided_batched_ex'): setkey_product(test, 'stride_x', ['N', 'incx', 'stride_scale']) setkey_product(test, 'stride_y', ['N', 'incy', 'stride_scale']) # we are using stride_c for param in rotm if all([x in test for x in ('stride_scale')]): test.setdefault('stride_c', int(test['stride_scale']) * 5) elif test['function'] in ('tpmv_strided_batched'): setkey_product(test, 'stride_x', ['M', 'incx', 'stride_scale']) # Let's use M * M (> (M * (M+1)) / 2) as a 'stride' size for the packed format. setkey_product(test, 'stride_a', ['M', 'M', 'stride_scale']) elif test['function'] in ('trmv_strided_batched'): setkey_product(test, 'stride_x', ['M', 'incx', 'stride_scale']) setkey_product(test, 'stride_a', ['M', 'lda', 'stride_scale']) elif test['function'] in ('gemv_strided_batched', 'gbmv_strided_batched', 'ger_strided_batched', 'geru_strided_batched', 'gerc_strided_batched', 'trsv_strided_batched'): if test['function'] in ('ger_strided_batched', 'geru_strided_batched', 'gerc_strided_batched', 'trsv_strided_batched' ) or test['transA'] in ('T', 'C'): setkey_product(test, 'stride_x', ['M', 'incx', 'stride_scale']) setkey_product(test, 'stride_y', ['N', 'incy', 'stride_scale']) else: setkey_product(test, 'stride_x', ['N', 'incx', 'stride_scale']) setkey_product(test, 'stride_y', ['M', 'incy', 'stride_scale']) if test['function'] in ('gbmv_strided_batched'): setkey_product(test, 'stride_a', ['lda', 'N', 'stride_scale']) elif test['function'] in ('hemv_strided_batched', 'hbmv_strided_batched'): if all([x in test for x in ('N', 'incx', 'incy', 'stride_scale')]): setkey_product(test, 'stride_x', ['N', 'incx', 'stride_scale']) setkey_product(test, 'stride_y', ['N', 'incy', 'stride_scale']) setkey_product(test, 'stride_a', ['N', 'lda', 'stride_scale']) elif test['function'] in ('hpmv_strided_batched'): if all([x in test for x in ('N', 'incx', 'incy', 'stride_scale')]): setkey_product(test, 'stride_x', ['N', 'incx', 'stride_scale']) setkey_product(test, 'stride_y', ['N', 'incy', 'stride_scale']) ldN = int((test['N'] * (test['N'] + 1) * test['stride_scale']) / 2) test.setdefault('stride_a', ldN) elif test['function'] in ('spr_strided_batched', 'spr2_strided_batched', 'hpr_strided_batched', 'hpr2_strided_batched', 'tpsv_strided_batched'): setkey_product(test, 'stride_x', ['N', 'incx', 'stride_scale']) setkey_product(test, 'stride_y', ['N', 'incy', 'stride_scale']) setkey_product(test, 'stride_a', ['N', 'N', 'stride_scale']) elif test['function'] in ('her_strided_batched', 'her2_strided_batched', 'syr2_strided_batched'): setkey_product(test, 'stride_x', ['N', 'incx', 'stride_scale']) setkey_product(test, 'stride_y', ['N', 'incy', 'stride_scale']) setkey_product(test, 'stride_a', ['N', 'lda', 'stride_scale']) # we are using stride_c for arg c and stride_d for arg s in rotg # these are are single values for each batch elif test['function'] in ('rotg_strided_batched'): if 'stride_scale' in test: test.setdefault('stride_a', int(test['stride_scale'])) test.setdefault('stride_b', int(test['stride_scale'])) test.setdefault('stride_c', int(test['stride_scale'])) test.setdefault('stride_d', int(test['stride_scale'])) # we are using stride_a for d1, stride_b for d2, and stride_c for param in # rotmg. These are are single values for each batch, except param which is # a 5 element array elif test['function'] in ('rotmg_strided_batched'): if 'stride_scale' in test: test.setdefault('stride_a', int(test['stride_scale'])) test.setdefault('stride_b', int(test['stride_scale'])) test.setdefault('stride_c', int(test['stride_scale']) * 5) test.setdefault('stride_x', int(test['stride_scale'])) test.setdefault('stride_y', int(test['stride_scale'])) elif test['function'] in ('dgmm_strided_batched'): setkey_product(test, 'stride_c', ['N', 'ldc', 'stride_scale']) setkey_product(test, 'stride_a', ['N', 'lda', 'stride_scale']) if test['side'].upper() == 'L': setkey_product(test, 'stride_x', ['M', 'incx', 'stride_scale']) else: setkey_product(test, 'stride_x', ['N', 'incx', 'stride_scale']) elif test['function'] in ('geam_strided_batched'): setkey_product(test, 'stride_c', ['N', 'ldc', 'stride_scale']) if test['transA'].upper() == 'N': setkey_product(test, 'stride_a', ['N', 'lda', 'stride_scale']) else: setkey_product(test, 'stride_a', ['M', 'lda', 'stride_scale']) if test['transB'].upper() == 'N': setkey_product(test, 'stride_b', ['N', 'ldb', 'stride_scale']) else: setkey_product(test, 'stride_b', ['M', 'ldb', 'stride_scale']) elif test['function'] in ('trmm_strided_batched'): setkey_product(test, 'stride_b', ['N', 'ldb', 'stride_scale']) if test['side'].upper() == 'L': setkey_product(test, 'stride_a', ['M', 'lda', 'stride_scale']) else: setkey_product(test, 'stride_a', ['N', 'lda', 'stride_scale']) elif test['function'] in ('trsm_strided_batched', 'trsm_strided_batched_ex'): setkey_product(test, 'stride_b', ['N', 'ldb', 'stride_scale']) if test['side'].upper() == 'L': setkey_product(test, 'stride_a', ['M', 'lda', 'stride_scale']) else: setkey_product(test, 'stride_a', ['N', 'lda', 'stride_scale']) elif test['function'] in ('tbmv_strided_batched'): if all([x in test for x in ('M', 'lda', 'stride_scale')]): ldM = int(test['M'] * test['lda'] * test['stride_scale']) test.setdefault('stride_a', ldM) if all([x in test for x in ('M', 'incx', 'stride_scale')]): ldx = int(test['M'] * abs(test['incx']) * test['stride_scale']) test.setdefault('stride_x', ldx) elif test['function'] in ('tbsv_strided_batched'): setkey_product(test, 'stride_a', ['N', 'lda', 'stride_scale']) setkey_product(test, 'stride_x', ['N', 'incx', 'stride_scale']) test.setdefault('stride_x', 0) test.setdefault('stride_y', 0) if test['transA'] == '*' or test['transB'] == '*': test.setdefault('lda', 0) test.setdefault('ldb', 0) test.setdefault('ldc', 0) test.setdefault('ldd', 0) else: test.setdefault('lda', test['M'] if test['transA'].upper() == 'N' else test['K'] if test['K'] != 0 else 1) test.setdefault('ldb', test['K'] if test['K'] != 0 else 1 if test['transB'].upper() == 'N' else test['N']) test.setdefault('ldc', test['M']) test.setdefault('ldd', test['M']) if test['batch_count'] > 0: test.setdefault('stride_a', test['lda'] * (test['K'] if test['transA'].upper() == 'N' else test['M'])) test.setdefault('stride_b', test['ldb'] * (test['N'] if test['transB'].upper() == 'N' else test['K'])) test.setdefault('stride_c', test['ldc'] * test['N']) test.setdefault('stride_d', test['ldd'] * test['N']) return test.setdefault('stride_a', 0) test.setdefault('stride_b', 0) test.setdefault('stride_c', 0) test.setdefault('stride_d', 0) def write_signature(out): """Write the signature used to verify binary file compatibility""" if 'signature_written' not in args: sig = 0 byt = bytearray("hipBLAS", 'utf_8') byt.append(0) last_ofs = 0 for (name, ctype) in param['Arguments']._fields_: member = getattr(param['Arguments'], name) for i in range(0, member.offset - last_ofs): byt.append(0) for i in range(0, member.size): byt.append(sig ^ i) sig = (sig + 89) % 256 last_ofs = member.offset + member.size for i in range(0, ctypes.sizeof(param['Arguments']) - last_ofs): byt.append(0) byt.extend(bytes("HIPblas", 'utf_8')) byt.append(0) out.write(byt) args['signature_written'] = True def write_test(test): """Write the test case out to the binary file if not seen already""" # For each argument declared in arguments, we generate a positional # argument in the Arguments constructor. For strings, we pass the # value of the string directly. For arrays, we unpack their contents # into the ctype array constructor and pass the ctype array. For # scalars, we coerce the string/numeric value into ctype. arg = [] for name, ctype in param['Arguments']._fields_: try: if issubclass(ctype, ctypes.Array): if issubclass(ctype._type_, ctypes.c_char): arg.append(bytes(test[name], 'utf_8')) else: arg.append(ctype(*test[name])) elif issubclass(ctype, ctypes.c_char): arg.append(bytes(test[name], 'utf_8')) else: arg.append(ctype(test[name])) except TypeError as err: sys.exit("TypeError: " + str(err) + " for " + name + ", which has type " + str(type(test[name])) + "\n") byt = bytes(param['Arguments'](*arg)) if byt not in testcases: testcases.add(byt) write_signature(args['outfile']) args['outfile'].write(byt) def instantiate(test): """Instantiate a given test case""" test = test.copy() # Any Arguments fields declared as enums (a_type, b_type, etc.) enum_args = [decl[0] for decl in param['Arguments']._fields_ if decl[1].__module__ == '__main__'] try: setdefaults(test) # For enum arguments, replace name with value for typename in enum_args: if test[typename] in datatypes: test[typename] = datatypes[test[typename]] known_bug_platforms = set() # Match known bugs if test['category'] not in ('known_bug', 'disabled'): for bug in param['known_bugs']: for key, value in bug.items(): if key == 'known_bug_platforms' or key == 'category': continue if key not in test: break if key == 'function': if not fnmatchcase(test[key], value): break # For keys declared as enums, compare resulting values elif test[key] != (datatypes.get(value, value) if key in enum_args else value): break else: # All values specified in known bug match the test case platforms = bug.get('known_bug_platforms', '') # If at least one known_bug_platforms is specified, add # each platform in platforms to known_bug_platforms set if platforms.strip(' :,\f\n\r\t\v'): known_bug_platforms |= set(re.split('[ :,\f\n\r\t\v]+', platforms)) else: test['category'] = 'known_bug' break # Unless category is already set to known_bug or disabled, set # known_bug_platforms to a space-separated list of platforms test['known_bug_platforms'] = ' ' . join(known_bug_platforms) if test[ 'category'] not in ('known_bug', 'disabled') else '' write_test(test) except KeyError as err: sys.exit("Undefined value " + str(err) + "\n" + str(test)) def generate(test, function): """Generate test combinations by iterating across lists recursively""" test = test.copy() # For specially named lists, they are expanded and merged into the test # argument list. When the list name is a dictionary of length 1, its pairs # indicate that the argument named by its key takes on values paired with # the argument named by its value, which is another dictionary list. We # process the value dictionaries' keys in alphabetic order, to ensure # deterministic test ordering. for argname in param['dict_lists_to_expand']: if type(argname) == dict: if len(argname) == 1: arg, target = list(argname.items())[0] if arg in test and type(test[arg]) == dict: pairs = sorted(list(test[arg].items()), key=lambda x: x[0]) for test[arg], test[target] in pairs: generate(test, function) return elif argname in test and type(test[argname]) in (tuple, list, dict): # Pop the list and iterate across it ilist = test.pop(argname) # For a bare dictionary, wrap it in a list and apply it once for item in [ilist] if type(ilist) == dict else ilist: try: case = test.copy() case.update(item) # original test merged with each item generate(case, function) except TypeError as err: sys.exit("TypeError: " + str(err) + " for " + argname + ", which has type " + str(type(item)) + "\nA name listed in \"Dictionary lists to " "expand\" must be a defined as a dictionary.\n") return for key in sorted(list(test)): # Integer arguments which are ranges (A..B[..C]) are expanded if type(test[key]) == str: match = INT_RANGE_RE.match(str(test[key])) if match: for test[key] in range(int(match.group(1)), int(match.group(2))+1, int(match.group(3) or 1)): generate(test, function) return # For sequence arguments, they are expanded into scalars elif (type(test[key]) in (tuple, list) and key not in param['lists_to_not_expand']): for test[key] in test[key]: generate(test, function) return # Replace typed function names with generic functions and types if 'hipblas_function' in test: func = test.pop('hipblas_function') if func in param['Functions']: test.update(param['Functions'][func]) else: test['function'] = func.rpartition('hipblas_')[2] generate(test, function) return function(test) if __name__ == '__main__': main() hipBLAS-rocm-6.4.3/clients/common/hipblas_parse_data.cpp000066400000000000000000000100471500474223100231460ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "hipblas_parse_data.hpp" #include "hipblas_data.hpp" #include "utility.h" #include #include #include #include #include #include #include // Parse YAML data static std::string hipblas_parse_yaml(const std::string& yaml) { std::string tmp = hipblas_tempname(); auto exepath = hipblas_exepath(); #ifdef HIPBLAS_V2 auto cmd = exepath + "hipblas_gentest.py --hipblas_v2 --template " + exepath + "hipblas_template.yaml -o " + tmp + " " + yaml; #else auto cmd = exepath + "hipblas_gentest.py --template " + exepath + "hipblas_template.yaml -o " + tmp + " " + yaml; #endif std::cerr << cmd << std::endl; #ifdef WIN32 int status = std::system(cmd.c_str()); if(status == -1) exit(EXIT_FAILURE); #else int status = system(cmd.c_str()); if(status == -1 || !WIFEXITED(status) || WEXITSTATUS(status)) exit(EXIT_FAILURE); #endif return tmp; } // Parse --data and --yaml command-line arguments bool hipblas_parse_data(int& argc, char** argv, const std::string& default_file) { std::string filename; char** argv_p = argv + 1; bool help = false, yaml = false; // Scan, process and remove any --yaml or --data options for(int i = 1; argv[i]; ++i) { if(!strcmp(argv[i], "--data") || !strcmp(argv[i], "--yaml")) { if(!strcmp(argv[i], "--yaml")) { yaml = true; } if(filename != "") { std::cerr << "Only one of the --yaml and --data options may be specified" << std::endl; exit(EXIT_FAILURE); } if(!argv[i + 1] || !argv[i + 1][0]) { std::cerr << "The " << argv[i] << " option requires an argument" << std::endl; exit(EXIT_FAILURE); } filename = argv[++i]; } else { *argv_p++ = argv[i]; if(!help && (!strcmp(argv[i], "-h") || !strcmp(argv[i], "--help"))) { help = true; std::cout << "\n" << argv[0] << " [ --data | --yaml ] ...\n" << std::endl; } } } // argc and argv contain remaining options and non-option arguments *argv_p = nullptr; argc = argv_p - argv; if(filename == "-") filename = "/dev/stdin"; else if(filename == "") filename = default_file; if(yaml) filename = hipblas_parse_yaml(filename); if(filename != "") { HipBLAS_TestData::set_filename(filename, yaml); return true; } return false; } hipBLAS-rocm-6.4.3/clients/common/hipblas_template_specialization.cpp000066400000000000000000027232061500474223100257660ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * * ************************************************************************/ #include "hipblas.h" #include "hipblas.hpp" #ifndef WIN32 #include "hipblas_fortran.hpp" #else #include "hipblas_no_fortran.hpp" #endif #include // This file's purpose is now only for casting hipblasComplex -> hipComplex when necessary. // When we finish transitioning to hipComplex, this file can be deleted. /* * =========================================================================== * level 1 BLAS * =========================================================================== */ #ifdef HIPBLAS_V2 // axpy hipblasStatus_t hipblasCaxpyCast(hipblasHandle_t handle, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasComplex* y, int incy) { return hipblasCaxpy( handle, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (hipComplex*)y, incy); } hipblasStatus_t hipblasZaxpyCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy) { return hipblasZaxpy(handle, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, (hipDoubleComplex*)y, incy); } hipblasStatus_t hipblasCaxpyCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* alpha, const hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy) { return hipblasCaxpy_64( handle, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (hipComplex*)y, incy); } hipblasStatus_t hipblasZaxpyCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy) { return hipblasZaxpy_64(handle, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, (hipDoubleComplex*)y, incy); } // axpy_batched hipblasStatus_t hipblasCaxpyBatchedCast(hipblasHandle_t handle, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, hipblasComplex* const y[], int incy, int batch_count) { return hipblasCaxpyBatched(handle, n, (const hipComplex*)alpha, (const hipComplex* const*)x, incx, (hipComplex* const*)y, incy, batch_count); } hipblasStatus_t hipblasZaxpyBatchedCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const y[], int incy, int batch_count) { return hipblasZaxpyBatched(handle, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)y, incy, batch_count); } hipblasStatus_t hipblasCaxpyBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* alpha, const hipblasComplex* const x[], int64_t incx, hipblasComplex* const y[], int64_t incy, int64_t batch_count) { return hipblasCaxpyBatched_64(handle, n, (const hipComplex*)alpha, (const hipComplex* const*)x, incx, (hipComplex* const*)y, incy, batch_count); } hipblasStatus_t hipblasZaxpyBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int64_t incx, hipblasDoubleComplex* const y[], int64_t incy, int64_t batch_count) { return hipblasZaxpyBatched_64(handle, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)y, incy, batch_count); } // axpy_strided_batched hipblasStatus_t hipblasCaxpyStridedBatchedCast(hipblasHandle_t handle, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasCaxpyStridedBatched(handle, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, stridex, (hipComplex*)y, incy, stridey, batch_count); } hipblasStatus_t hipblasZaxpyStridedBatchedCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasZaxpyStridedBatched(handle, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, stridex, (hipDoubleComplex*)y, incy, stridey, batch_count); } hipblasStatus_t hipblasCaxpyStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* alpha, const hipblasComplex* x, int64_t incx, hipblasStride stridex, hipblasComplex* y, int64_t incy, hipblasStride stridey, int64_t batch_count) { return hipblasCaxpyStridedBatched_64(handle, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, stridex, (hipComplex*)y, incy, stridey, batch_count); } hipblasStatus_t hipblasZaxpyStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, hipblasDoubleComplex* y, int64_t incy, hipblasStride stridey, int64_t batch_count) { return hipblasZaxpyStridedBatched_64(handle, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, stridex, (hipDoubleComplex*)y, incy, stridey, batch_count); } // swap hipblasStatus_t hipblasCswapCast( hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy) { return hipblasCswap(handle, n, (hipComplex*)x, incx, (hipComplex*)y, incy); } hipblasStatus_t hipblasZswapCast(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy) { return hipblasZswap(handle, n, (hipDoubleComplex*)x, incx, (hipDoubleComplex*)y, incy); } // swap_64 hipblasStatus_t hipblasCswapCast_64(hipblasHandle_t handle, int64_t n, hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy) { return hipblasCswap_64(handle, n, (hipComplex*)x, incx, (hipComplex*)y, incy); } hipblasStatus_t hipblasZswapCast_64(hipblasHandle_t handle, int64_t n, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy) { return hipblasZswap_64(handle, n, (hipDoubleComplex*)x, incx, (hipDoubleComplex*)y, incy); } // swap_batched hipblasStatus_t hipblasCswapBatchedCast(hipblasHandle_t handle, int n, hipblasComplex* const x[], int incx, hipblasComplex* const y[], int incy, int batch_count) { return hipblasCswapBatched( handle, n, (hipComplex* const*)x, incx, (hipComplex* const*)y, incy, batch_count); } hipblasStatus_t hipblasZswapBatchedCast(hipblasHandle_t handle, int n, hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const y[], int incy, int batch_count) { return hipblasZswapBatched(handle, n, (hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)y, incy, batch_count); } // swap_batched_64 hipblasStatus_t hipblasCswapBatchedCast_64(hipblasHandle_t handle, int64_t n, hipblasComplex* const x[], int64_t incx, hipblasComplex* const y[], int64_t incy, int64_t batch_count) { return hipblasCswapBatched_64( handle, n, (hipComplex* const*)x, incx, (hipComplex* const*)y, incy, batch_count); } hipblasStatus_t hipblasZswapBatchedCast_64(hipblasHandle_t handle, int64_t n, hipblasDoubleComplex* const x[], int64_t incx, hipblasDoubleComplex* const y[], int64_t incy, int64_t batch_count) { return hipblasZswapBatched_64(handle, n, (hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)y, incy, batch_count); } // swap_strided_batched hipblasStatus_t hipblasCswapStridedBatchedCast(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasCswapStridedBatched( handle, n, (hipComplex*)x, incx, stridex, (hipComplex*)y, incy, stridey, batch_count); } hipblasStatus_t hipblasZswapStridedBatchedCast(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasZswapStridedBatched(handle, n, (hipDoubleComplex*)x, incx, stridex, (hipDoubleComplex*)y, incy, stridey, batch_count); } // swap_strided_batched_64 hipblasStatus_t hipblasCswapStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, hipblasComplex* x, int64_t incx, hipblasStride stridex, hipblasComplex* y, int64_t incy, hipblasStride stridey, int64_t batch_count) { return hipblasCswapStridedBatched_64( handle, n, (hipComplex*)x, incx, stridex, (hipComplex*)y, incy, stridey, batch_count); } hipblasStatus_t hipblasZswapStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, hipblasDoubleComplex* y, int64_t incy, hipblasStride stridey, int64_t batch_count) { return hipblasZswapStridedBatched_64(handle, n, (hipDoubleComplex*)x, incx, stridex, (hipDoubleComplex*)y, incy, stridey, batch_count); } // copy hipblasStatus_t hipblasCcopyCast( hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasComplex* y, int incy) { return hipblasCcopy(handle, n, (const hipComplex*)x, incx, (hipComplex*)y, incy); } hipblasStatus_t hipblasZcopyCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy) { return hipblasZcopy(handle, n, (const hipDoubleComplex*)x, incx, (hipDoubleComplex*)y, incy); } hipblasStatus_t hipblasCcopyCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy) { return hipblasCcopy_64(handle, n, (const hipComplex*)x, incx, (hipComplex*)y, incy); } hipblasStatus_t hipblasZcopyCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy) { return hipblasZcopy_64(handle, n, (const hipDoubleComplex*)x, incx, (hipDoubleComplex*)y, incy); } // batched hipblasStatus_t hipblasCcopyBatchedCast(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, hipblasComplex* const y[], int incy, int batch_count) { return hipblasCcopyBatched( handle, n, (const hipComplex* const*)x, incx, (hipComplex* const*)y, incy, batch_count); } hipblasStatus_t hipblasZcopyBatchedCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const y[], int incy, int batch_count) { return hipblasZcopyBatched(handle, n, (const hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)y, incy, batch_count); } hipblasStatus_t hipblasCcopyBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* const x[], int64_t incx, hipblasComplex* const y[], int64_t incy, int64_t batch_count) { return hipblasCcopyBatched_64( handle, n, (const hipComplex* const*)x, incx, (hipComplex* const*)y, incy, batch_count); } hipblasStatus_t hipblasZcopyBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* const x[], int64_t incx, hipblasDoubleComplex* const y[], int64_t incy, int64_t batch_count) { return hipblasZcopyBatched_64(handle, n, (const hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)y, incy, batch_count); } // strided_batched hipblasStatus_t hipblasCcopyStridedBatchedCast(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasCcopyStridedBatched( handle, n, (const hipComplex*)x, incx, stridex, (hipComplex*)y, incy, stridey, batch_count); } hipblasStatus_t hipblasZcopyStridedBatchedCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasZcopyStridedBatched(handle, n, (const hipDoubleComplex*)x, incx, stridex, (hipDoubleComplex*)y, incy, stridey, batch_count); } hipblasStatus_t hipblasCcopyStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* x, int64_t incx, hipblasStride stridex, hipblasComplex* y, int64_t incy, hipblasStride stridey, int64_t batch_count) { return hipblasCcopyStridedBatched_64( handle, n, (const hipComplex*)x, incx, stridex, (hipComplex*)y, incy, stridey, batch_count); } hipblasStatus_t hipblasZcopyStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, hipblasDoubleComplex* y, int64_t incy, hipblasStride stridey, int64_t batch_count) { return hipblasZcopyStridedBatched_64(handle, n, (const hipDoubleComplex*)x, incx, stridex, (hipDoubleComplex*)y, incy, stridey, batch_count); } // dot hipblasStatus_t hipblasCdotuCast(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* result) { return hipblasCdotu( handle, n, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)result); } hipblasStatus_t hipblasZdotuCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* result) { return hipblasZdotu(handle, n, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)y, incy, (hipDoubleComplex*)result); } hipblasStatus_t hipblasCdotcCast(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* result) { return hipblasCdotc( handle, n, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)result); } hipblasStatus_t hipblasZdotcCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* result) { return hipblasZdotc(handle, n, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)y, incy, (hipDoubleComplex*)result); } hipblasStatus_t hipblasCdotuCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* x, int64_t incx, const hipblasComplex* y, int64_t incy, hipblasComplex* result) { return hipblasCdotu_64( handle, n, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)result); } hipblasStatus_t hipblasZdotuCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* x, int64_t incx, const hipblasDoubleComplex* y, int64_t incy, hipblasDoubleComplex* result) { return hipblasZdotu_64(handle, n, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)y, incy, (hipDoubleComplex*)result); } hipblasStatus_t hipblasCdotcCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* x, int64_t incx, const hipblasComplex* y, int64_t incy, hipblasComplex* result) { return hipblasCdotc_64( handle, n, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)result); } hipblasStatus_t hipblasZdotcCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* x, int64_t incx, const hipblasDoubleComplex* y, int64_t incy, hipblasDoubleComplex* result) { return hipblasZdotc_64(handle, n, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)y, incy, (hipDoubleComplex*)result); } // dot_batched hipblasStatus_t hipblasCdotuBatchedCast(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, int batch_count, hipblasComplex* result) { return hipblasCdotuBatched(handle, n, (const hipComplex* const*)x, incx, (const hipComplex* const*)y, incy, batch_count, (hipComplex*)result); } hipblasStatus_t hipblasCdotcBatchedCast(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, int batch_count, hipblasComplex* result) { return hipblasCdotcBatched(handle, n, (const hipComplex* const*)x, incx, (const hipComplex* const*)y, incy, batch_count, (hipComplex*)result); } hipblasStatus_t hipblasZdotuBatchedCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, int batch_count, hipblasDoubleComplex* result) { return hipblasZdotuBatched(handle, n, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex* const*)y, incy, batch_count, (hipDoubleComplex*)result); } hipblasStatus_t hipblasZdotcBatchedCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, int batch_count, hipblasDoubleComplex* result) { return hipblasZdotcBatched(handle, n, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex* const*)y, incy, batch_count, (hipDoubleComplex*)result); } hipblasStatus_t hipblasCdotuBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* const x[], int64_t incx, const hipblasComplex* const y[], int64_t incy, int64_t batch_count, hipblasComplex* result) { return hipblasCdotuBatched_64(handle, n, (const hipComplex* const*)x, incx, (const hipComplex* const*)y, incy, batch_count, (hipComplex*)result); } hipblasStatus_t hipblasCdotcBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* const x[], int64_t incx, const hipblasComplex* const y[], int64_t incy, int64_t batch_count, hipblasComplex* result) { return hipblasCdotcBatched_64(handle, n, (const hipComplex* const*)x, incx, (const hipComplex* const*)y, incy, batch_count, (hipComplex*)result); } hipblasStatus_t hipblasZdotuBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* const x[], int64_t incx, const hipblasDoubleComplex* const y[], int64_t incy, int64_t batch_count, hipblasDoubleComplex* result) { return hipblasZdotuBatched_64(handle, n, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex* const*)y, incy, batch_count, (hipDoubleComplex*)result); } hipblasStatus_t hipblasZdotcBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* const x[], int64_t incx, const hipblasDoubleComplex* const y[], int64_t incy, int64_t batch_count, hipblasDoubleComplex* result) { return hipblasZdotcBatched_64(handle, n, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex* const*)y, incy, batch_count, (hipDoubleComplex*)result); } // dot_strided_batched hipblasStatus_t hipblasCdotuStridedBatchedCast(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, int batch_count, hipblasComplex* result) { return hipblasCdotuStridedBatched(handle, n, (const hipComplex*)x, incx, stridex, (const hipComplex*)y, incy, stridey, batch_count, (hipComplex*)result); } hipblasStatus_t hipblasCdotcStridedBatchedCast(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, int batch_count, hipblasComplex* result) { return hipblasCdotcStridedBatched(handle, n, (const hipComplex*)x, incx, stridex, (const hipComplex*)y, incy, stridey, batch_count, (hipComplex*)result); } hipblasStatus_t hipblasZdotuStridedBatchedCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count, hipblasDoubleComplex* result) { return hipblasZdotuStridedBatched(handle, n, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)y, incy, stridey, batch_count, (hipDoubleComplex*)result); } hipblasStatus_t hipblasZdotcStridedBatchedCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count, hipblasDoubleComplex* result) { return hipblasZdotcStridedBatched(handle, n, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)y, incy, stridey, batch_count, (hipDoubleComplex*)result); } hipblasStatus_t hipblasCdotuStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* x, int64_t incx, hipblasStride stridex, const hipblasComplex* y, int64_t incy, hipblasStride stridey, int64_t batch_count, hipblasComplex* result) { return hipblasCdotuStridedBatched_64(handle, n, (const hipComplex*)x, incx, stridex, (const hipComplex*)y, incy, stridey, batch_count, (hipComplex*)result); } hipblasStatus_t hipblasCdotcStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* x, int64_t incx, hipblasStride stridex, const hipblasComplex* y, int64_t incy, hipblasStride stridey, int64_t batch_count, hipblasComplex* result) { return hipblasCdotcStridedBatched_64(handle, n, (const hipComplex*)x, incx, stridex, (const hipComplex*)y, incy, stridey, batch_count, (hipComplex*)result); } hipblasStatus_t hipblasZdotuStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, const hipblasDoubleComplex* y, int64_t incy, hipblasStride stridey, int64_t batch_count, hipblasDoubleComplex* result) { return hipblasZdotuStridedBatched_64(handle, n, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)y, incy, stridey, batch_count, (hipDoubleComplex*)result); } hipblasStatus_t hipblasZdotcStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, const hipblasDoubleComplex* y, int64_t incy, hipblasStride stridey, int64_t batch_count, hipblasDoubleComplex* result) { return hipblasZdotcStridedBatched_64(handle, n, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)y, incy, stridey, batch_count, (hipDoubleComplex*)result); } // asum hipblasStatus_t hipblasScasumCast( hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, float* result) { return hipblasScasum(handle, n, (const hipComplex*)x, incx, result); } hipblasStatus_t hipblasDzasumCast( hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, double* result) { return hipblasDzasum(handle, n, (const hipDoubleComplex*)x, incx, result); } hipblasStatus_t hipblasScasumCast_64( hipblasHandle_t handle, int64_t n, const hipblasComplex* x, int64_t incx, float* result) { return hipblasScasum_64(handle, n, (const hipComplex*)x, incx, result); } hipblasStatus_t hipblasDzasumCast_64( hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* x, int64_t incx, double* result) { return hipblasDzasum_64(handle, n, (const hipDoubleComplex*)x, incx, result); } // asum_batched hipblasStatus_t hipblasScasumBatchedCast(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, int batch_count, float* result) { return hipblasScasumBatched(handle, n, (const hipComplex* const*)x, incx, batch_count, result); } hipblasStatus_t hipblasDzasumBatchedCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, int batch_count, double* result) { return hipblasDzasumBatched( handle, n, (const hipDoubleComplex* const*)x, incx, batch_count, result); } hipblasStatus_t hipblasScasumBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* const x[], int64_t incx, int64_t batch_count, float* result) { return hipblasScasumBatched_64( handle, n, (const hipComplex* const*)x, incx, batch_count, result); } hipblasStatus_t hipblasDzasumBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* const x[], int64_t incx, int64_t batch_count, double* result) { return hipblasDzasumBatched_64( handle, n, (const hipDoubleComplex* const*)x, incx, batch_count, result); } // asum_strided_batched hipblasStatus_t hipblasScasumStridedBatchedCast(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, int batch_count, float* result) { return hipblasScasumStridedBatched( handle, n, (const hipComplex*)x, incx, stridex, batch_count, result); } hipblasStatus_t hipblasDzasumStridedBatchedCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count, double* result) { return hipblasDzasumStridedBatched( handle, n, (const hipDoubleComplex*)x, incx, stridex, batch_count, result); } hipblasStatus_t hipblasScasumStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* x, int64_t incx, hipblasStride stridex, int64_t batch_count, float* result) { return hipblasScasumStridedBatched_64( handle, n, (const hipComplex*)x, incx, stridex, batch_count, result); } hipblasStatus_t hipblasDzasumStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, int64_t batch_count, double* result) { return hipblasDzasumStridedBatched_64( handle, n, (const hipDoubleComplex*)x, incx, stridex, batch_count, result); } // nrm2 hipblasStatus_t hipblasScnrm2Cast( hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, float* result) { return hipblasScnrm2(handle, n, (const hipComplex*)x, incx, result); } hipblasStatus_t hipblasDznrm2Cast( hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, double* result) { return hipblasDznrm2(handle, n, (const hipDoubleComplex*)x, incx, result); } hipblasStatus_t hipblasScnrm2Cast_64( hipblasHandle_t handle, int64_t n, const hipblasComplex* x, int64_t incx, float* result) { return hipblasScnrm2_64(handle, n, (const hipComplex*)x, incx, result); } hipblasStatus_t hipblasDznrm2Cast_64( hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* x, int64_t incx, double* result) { return hipblasDznrm2_64(handle, n, (const hipDoubleComplex*)x, incx, result); } // nrm2_batched hipblasStatus_t hipblasScnrm2BatchedCast(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, int batch_count, float* result) { return hipblasScnrm2Batched(handle, n, (const hipComplex* const*)x, incx, batch_count, result); } hipblasStatus_t hipblasDznrm2BatchedCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, int batch_count, double* result) { return hipblasDznrm2Batched( handle, n, (const hipDoubleComplex* const*)x, incx, batch_count, result); } hipblasStatus_t hipblasScnrm2BatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* const x[], int64_t incx, int64_t batch_count, float* result) { return hipblasScnrm2Batched_64( handle, n, (const hipComplex* const*)x, incx, batch_count, result); } hipblasStatus_t hipblasDznrm2BatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* const x[], int64_t incx, int64_t batch_count, double* result) { return hipblasDznrm2Batched_64( handle, n, (const hipDoubleComplex* const*)x, incx, batch_count, result); } // nrm2_strided_batched hipblasStatus_t hipblasScnrm2StridedBatchedCast(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, int batch_count, float* result) { return hipblasScnrm2StridedBatched( handle, n, (const hipComplex*)x, incx, stridex, batch_count, result); } hipblasStatus_t hipblasDznrm2StridedBatchedCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count, double* result) { return hipblasDznrm2StridedBatched( handle, n, (const hipDoubleComplex*)x, incx, stridex, batch_count, result); } hipblasStatus_t hipblasScnrm2StridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* x, int64_t incx, hipblasStride stridex, int64_t batch_count, float* result) { return hipblasScnrm2StridedBatched_64( handle, n, (const hipComplex*)x, incx, stridex, batch_count, result); } hipblasStatus_t hipblasDznrm2StridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, int64_t batch_count, double* result) { return hipblasDznrm2StridedBatched_64( handle, n, (const hipDoubleComplex*)x, incx, stridex, batch_count, result); } // rot hipblasStatus_t hipblasCrotCast(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy, const float* c, const hipblasComplex* s) { return hipblasCrot( handle, n, (hipComplex*)x, incx, (hipComplex*)y, incy, c, (const hipComplex*)s); } hipblasStatus_t hipblasCsrotCast(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasComplex* y, int incy, const float* c, const float* s) { return hipblasCsrot(handle, n, (hipComplex*)x, incx, (hipComplex*)y, incy, c, s); } hipblasStatus_t hipblasZrotCast(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy, const double* c, const hipblasDoubleComplex* s) { return hipblasZrot(handle, n, (hipDoubleComplex*)x, incx, (hipDoubleComplex*)y, incy, c, (const hipDoubleComplex*)s); } hipblasStatus_t hipblasZdrotCast(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* y, int incy, const double* c, const double* s) { return hipblasZdrot(handle, n, (hipDoubleComplex*)x, incx, (hipDoubleComplex*)y, incy, c, s); } hipblasStatus_t hipblasCrotCast_64(hipblasHandle_t handle, int64_t n, hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy, const float* c, const hipblasComplex* s) { return hipblasCrot_64( handle, n, (hipComplex*)x, incx, (hipComplex*)y, incy, c, (const hipComplex*)s); } hipblasStatus_t hipblasCsrotCast_64(hipblasHandle_t handle, int64_t n, hipblasComplex* x, int64_t incx, hipblasComplex* y, int64_t incy, const float* c, const float* s) { return hipblasCsrot_64(handle, n, (hipComplex*)x, incx, (hipComplex*)y, incy, c, s); } hipblasStatus_t hipblasZrotCast_64(hipblasHandle_t handle, int64_t n, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy, const double* c, const hipblasDoubleComplex* s) { return hipblasZrot_64(handle, n, (hipDoubleComplex*)x, incx, (hipDoubleComplex*)y, incy, c, (const hipDoubleComplex*)s); } hipblasStatus_t hipblasZdrotCast_64(hipblasHandle_t handle, int64_t n, hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* y, int64_t incy, const double* c, const double* s) { return hipblasZdrot_64(handle, n, (hipDoubleComplex*)x, incx, (hipDoubleComplex*)y, incy, c, s); } // rot_batched hipblasStatus_t hipblasCrotBatchedCast(hipblasHandle_t handle, int n, hipblasComplex* const x[], int incx, hipblasComplex* const y[], int incy, const float* c, const hipblasComplex* s, int batch_count) { return hipblasCrotBatched(handle, n, (hipComplex* const*)x, incx, (hipComplex* const*)y, incy, c, (const hipComplex*)s, batch_count); } hipblasStatus_t hipblasCsrotBatchedCast(hipblasHandle_t handle, int n, hipblasComplex* const x[], int incx, hipblasComplex* const y[], int incy, const float* c, const float* s, int batch_count) { return hipblasCsrotBatched( handle, n, (hipComplex* const*)x, incx, (hipComplex* const*)y, incy, c, s, batch_count); } hipblasStatus_t hipblasZrotBatchedCast(hipblasHandle_t handle, int n, hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const y[], int incy, const double* c, const hipblasDoubleComplex* s, int batch_count) { return hipblasZrotBatched(handle, n, (hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)y, incy, c, (const hipDoubleComplex*)s, batch_count); } hipblasStatus_t hipblasZdrotBatchedCast(hipblasHandle_t handle, int n, hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const y[], int incy, const double* c, const double* s, int batch_count) { return hipblasZdrotBatched(handle, n, (hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)y, incy, c, s, batch_count); } hipblasStatus_t hipblasCrotBatchedCast_64(hipblasHandle_t handle, int64_t n, hipblasComplex* const x[], int64_t incx, hipblasComplex* const y[], int64_t incy, const float* c, const hipblasComplex* s, int64_t batch_count) { return hipblasCrotBatched_64(handle, n, (hipComplex* const*)x, incx, (hipComplex* const*)y, incy, c, (const hipComplex*)s, batch_count); } hipblasStatus_t hipblasCsrotBatchedCast_64(hipblasHandle_t handle, int64_t n, hipblasComplex* const x[], int64_t incx, hipblasComplex* const y[], int64_t incy, const float* c, const float* s, int64_t batch_count) { return hipblasCsrotBatched_64( handle, n, (hipComplex* const*)x, incx, (hipComplex* const*)y, incy, c, s, batch_count); } hipblasStatus_t hipblasZrotBatchedCast_64(hipblasHandle_t handle, int64_t n, hipblasDoubleComplex* const x[], int64_t incx, hipblasDoubleComplex* const y[], int64_t incy, const double* c, const hipblasDoubleComplex* s, int64_t batch_count) { return hipblasZrotBatched_64(handle, n, (hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)y, incy, c, (const hipDoubleComplex*)s, batch_count); } hipblasStatus_t hipblasZdrotBatchedCast_64(hipblasHandle_t handle, int64_t n, hipblasDoubleComplex* const x[], int64_t incx, hipblasDoubleComplex* const y[], int64_t incy, const double* c, const double* s, int64_t batch_count) { return hipblasZdrotBatched_64(handle, n, (hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)y, incy, c, s, batch_count); } // rot_strided_batched hipblasStatus_t hipblasCrotStridedBatchedCast(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* y, int incy, hipblasStride stridey, const float* c, const hipblasComplex* s, int batch_count) { return hipblasCrotStridedBatched(handle, n, (hipComplex*)x, incx, stridex, (hipComplex*)y, incy, stridey, c, (const hipComplex*)s, batch_count); } hipblasStatus_t hipblasCsrotStridedBatchedCast(hipblasHandle_t handle, int n, hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* y, int incy, hipblasStride stridey, const float* c, const float* s, int batch_count) { return hipblasCsrotStridedBatched( handle, n, (hipComplex*)x, incx, stridex, (hipComplex*)y, incy, stridey, c, s, batch_count); } hipblasStatus_t hipblasZrotStridedBatchedCast(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* y, int incy, hipblasStride stridey, const double* c, const hipblasDoubleComplex* s, int batch_count) { return hipblasZrotStridedBatched(handle, n, (hipDoubleComplex*)x, incx, stridex, (hipDoubleComplex*)y, incy, stridey, c, (const hipDoubleComplex*)s, batch_count); } hipblasStatus_t hipblasZdrotStridedBatchedCast(hipblasHandle_t handle, int n, hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* y, int incy, hipblasStride stridey, const double* c, const double* s, int batch_count) { return hipblasZdrotStridedBatched(handle, n, (hipDoubleComplex*)x, incx, stridex, (hipDoubleComplex*)y, incy, stridey, c, s, batch_count); } hipblasStatus_t hipblasCrotStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, hipblasComplex* x, int64_t incx, hipblasStride stridex, hipblasComplex* y, int64_t incy, hipblasStride stridey, const float* c, const hipblasComplex* s, int64_t batch_count) { return hipblasCrotStridedBatched_64(handle, n, (hipComplex*)x, incx, stridex, (hipComplex*)y, incy, stridey, c, (const hipComplex*)s, batch_count); } hipblasStatus_t hipblasCsrotStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, hipblasComplex* x, int64_t incx, hipblasStride stridex, hipblasComplex* y, int64_t incy, hipblasStride stridey, const float* c, const float* s, int64_t batch_count) { return hipblasCsrotStridedBatched_64( handle, n, (hipComplex*)x, incx, stridex, (hipComplex*)y, incy, stridey, c, s, batch_count); } hipblasStatus_t hipblasZrotStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, hipblasDoubleComplex* y, int64_t incy, hipblasStride stridey, const double* c, const hipblasDoubleComplex* s, int64_t batch_count) { return hipblasZrotStridedBatched_64(handle, n, (hipDoubleComplex*)x, incx, stridex, (hipDoubleComplex*)y, incy, stridey, c, (const hipDoubleComplex*)s, batch_count); } hipblasStatus_t hipblasZdrotStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, hipblasDoubleComplex* y, int64_t incy, hipblasStride stridey, const double* c, const double* s, int64_t batch_count) { return hipblasZdrotStridedBatched_64(handle, n, (hipDoubleComplex*)x, incx, stridex, (hipDoubleComplex*)y, incy, stridey, c, s, batch_count); } // rotg hipblasStatus_t hipblasCrotgCast( hipblasHandle_t handle, hipblasComplex* a, hipblasComplex* b, float* c, hipblasComplex* s) { return hipblasCrotg(handle, (hipComplex*)a, (hipComplex*)b, c, (hipComplex*)s); } hipblasStatus_t hipblasZrotgCast(hipblasHandle_t handle, hipblasDoubleComplex* a, hipblasDoubleComplex* b, double* c, hipblasDoubleComplex* s) { return hipblasZrotg( handle, (hipDoubleComplex*)a, (hipDoubleComplex*)b, c, (hipDoubleComplex*)s); } hipblasStatus_t hipblasCrotgCast_64( hipblasHandle_t handle, hipblasComplex* a, hipblasComplex* b, float* c, hipblasComplex* s) { return hipblasCrotg_64(handle, (hipComplex*)a, (hipComplex*)b, c, (hipComplex*)s); } hipblasStatus_t hipblasZrotgCast_64(hipblasHandle_t handle, hipblasDoubleComplex* a, hipblasDoubleComplex* b, double* c, hipblasDoubleComplex* s) { return hipblasZrotg_64( handle, (hipDoubleComplex*)a, (hipDoubleComplex*)b, c, (hipDoubleComplex*)s); } // rotg_batched hipblasStatus_t hipblasCrotgBatchedCast(hipblasHandle_t handle, hipblasComplex* const a[], hipblasComplex* const b[], float* const c[], hipblasComplex* const s[], int batch_count) { return hipblasCrotgBatched(handle, (hipComplex* const*)a, (hipComplex* const*)b, c, (hipComplex* const*)s, batch_count); } hipblasStatus_t hipblasZrotgBatchedCast(hipblasHandle_t handle, hipblasDoubleComplex* const a[], hipblasDoubleComplex* const b[], double* const c[], hipblasDoubleComplex* const s[], int batch_count) { return hipblasZrotgBatched(handle, (hipDoubleComplex* const*)a, (hipDoubleComplex* const*)b, c, (hipDoubleComplex* const*)s, batch_count); } hipblasStatus_t hipblasCrotgBatchedCast_64(hipblasHandle_t handle, hipblasComplex* const a[], hipblasComplex* const b[], float* const c[], hipblasComplex* const s[], int64_t batch_count) { return hipblasCrotgBatched_64(handle, (hipComplex* const*)a, (hipComplex* const*)b, c, (hipComplex* const*)s, batch_count); } hipblasStatus_t hipblasZrotgBatchedCast_64(hipblasHandle_t handle, hipblasDoubleComplex* const a[], hipblasDoubleComplex* const b[], double* const c[], hipblasDoubleComplex* const s[], int64_t batch_count) { return hipblasZrotgBatched_64(handle, (hipDoubleComplex* const*)a, (hipDoubleComplex* const*)b, c, (hipDoubleComplex* const*)s, batch_count); } // rotg_strided_batched hipblasStatus_t hipblasCrotgStridedBatchedCast(hipblasHandle_t handle, hipblasComplex* a, hipblasStride stridea, hipblasComplex* b, hipblasStride strideb, float* c, hipblasStride stridec, hipblasComplex* s, hipblasStride strides, int batch_count) { return hipblasCrotgStridedBatched(handle, (hipComplex*)a, stridea, (hipComplex*)b, strideb, c, stridec, (hipComplex*)s, strides, batch_count); } hipblasStatus_t hipblasZrotgStridedBatchedCast(hipblasHandle_t handle, hipblasDoubleComplex* a, hipblasStride stridea, hipblasDoubleComplex* b, hipblasStride strideb, double* c, hipblasStride stridec, hipblasDoubleComplex* s, hipblasStride strides, int batch_count) { return hipblasZrotgStridedBatched(handle, (hipDoubleComplex*)a, stridea, (hipDoubleComplex*)b, strideb, c, stridec, (hipDoubleComplex*)s, strides, batch_count); } hipblasStatus_t hipblasCrotgStridedBatchedCast_64(hipblasHandle_t handle, hipblasComplex* a, hipblasStride stridea, hipblasComplex* b, hipblasStride strideb, float* c, hipblasStride stridec, hipblasComplex* s, hipblasStride strides, int64_t batch_count) { return hipblasCrotgStridedBatched_64(handle, (hipComplex*)a, stridea, (hipComplex*)b, strideb, c, stridec, (hipComplex*)s, strides, batch_count); } hipblasStatus_t hipblasZrotgStridedBatchedCast_64(hipblasHandle_t handle, hipblasDoubleComplex* a, hipblasStride stridea, hipblasDoubleComplex* b, hipblasStride strideb, double* c, hipblasStride stridec, hipblasDoubleComplex* s, hipblasStride strides, int64_t batch_count) { return hipblasZrotgStridedBatched_64(handle, (hipDoubleComplex*)a, stridea, (hipDoubleComplex*)b, strideb, c, stridec, (hipDoubleComplex*)s, strides, batch_count); } // rotm, rotmg - no complex versions // amax hipblasStatus_t hipblasIcamaxCast(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, int* result) { return hipblasIcamax(handle, n, (const hipComplex*)x, incx, result); } hipblasStatus_t hipblasIzamaxCast( hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, int* result) { return hipblasIzamax(handle, n, (const hipDoubleComplex*)x, incx, result); } hipblasStatus_t hipblasIcamaxCast_64( hipblasHandle_t handle, int64_t n, const hipblasComplex* x, int64_t incx, int64_t* result) { return hipblasIcamax_64(handle, n, (const hipComplex*)x, incx, result); } hipblasStatus_t hipblasIzamaxCast_64( hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* x, int64_t incx, int64_t* result) { return hipblasIzamax_64(handle, n, (const hipDoubleComplex*)x, incx, result); } // amax_batched hipblasStatus_t hipblasIcamaxBatchedCast(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, int batch_count, int* result) { return hipblasIcamaxBatched(handle, n, (const hipComplex* const*)x, incx, batch_count, result); } hipblasStatus_t hipblasIzamaxBatchedCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, int batch_count, int* result) { return hipblasIzamaxBatched( handle, n, (const hipDoubleComplex* const*)x, incx, batch_count, result); } hipblasStatus_t hipblasIcamaxBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* const x[], int64_t incx, int64_t batch_count, int64_t* result) { return hipblasIcamaxBatched_64( handle, n, (const hipComplex* const*)x, incx, batch_count, result); } hipblasStatus_t hipblasIzamaxBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* const x[], int64_t incx, int64_t batch_count, int64_t* result) { return hipblasIzamaxBatched_64( handle, n, (const hipDoubleComplex* const*)x, incx, batch_count, result); } // amax_strided_batched hipblasStatus_t hipblasIcamaxStridedBatchedCast(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIcamaxStridedBatched( handle, n, (const hipComplex*)x, incx, stridex, batch_count, result); } hipblasStatus_t hipblasIzamaxStridedBatchedCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIzamaxStridedBatched( handle, n, (const hipDoubleComplex*)x, incx, stridex, batch_count, result); } hipblasStatus_t hipblasIcamaxStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* x, int64_t incx, hipblasStride stridex, int64_t batch_count, int64_t* result) { return hipblasIcamaxStridedBatched_64( handle, n, (const hipComplex*)x, incx, stridex, batch_count, result); } hipblasStatus_t hipblasIzamaxStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, int64_t batch_count, int64_t* result) { return hipblasIzamaxStridedBatched_64( handle, n, (const hipDoubleComplex*)x, incx, stridex, batch_count, result); } // amin hipblasStatus_t hipblasIcaminCast(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, int* result) { return hipblasIcamin(handle, n, (const hipComplex*)x, incx, result); } hipblasStatus_t hipblasIzaminCast( hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, int* result) { return hipblasIzamin(handle, n, (const hipDoubleComplex*)x, incx, result); } hipblasStatus_t hipblasIcaminCast_64( hipblasHandle_t handle, int64_t n, const hipblasComplex* x, int64_t incx, int64_t* result) { return hipblasIcamin_64(handle, n, (const hipComplex*)x, incx, result); } hipblasStatus_t hipblasIzaminCast_64( hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* x, int64_t incx, int64_t* result) { return hipblasIzamin_64(handle, n, (const hipDoubleComplex*)x, incx, result); } // amin_batched hipblasStatus_t hipblasIcaminBatchedCast(hipblasHandle_t handle, int n, const hipblasComplex* const x[], int incx, int batch_count, int* result) { return hipblasIcaminBatched(handle, n, (const hipComplex* const*)x, incx, batch_count, result); } hipblasStatus_t hipblasIzaminBatchedCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* const x[], int incx, int batch_count, int* result) { return hipblasIzaminBatched( handle, n, (const hipDoubleComplex* const*)x, incx, batch_count, result); } hipblasStatus_t hipblasIcaminBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* const x[], int64_t incx, int64_t batch_count, int64_t* result) { return hipblasIcaminBatched_64( handle, n, (const hipComplex* const*)x, incx, batch_count, result); } hipblasStatus_t hipblasIzaminBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* const x[], int64_t incx, int64_t batch_count, int64_t* result) { return hipblasIzaminBatched_64( handle, n, (const hipDoubleComplex* const*)x, incx, batch_count, result); } // amin_strided_batched hipblasStatus_t hipblasIcaminStridedBatchedCast(hipblasHandle_t handle, int n, const hipblasComplex* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIcaminStridedBatched( handle, n, (const hipComplex*)x, incx, stridex, batch_count, result); } hipblasStatus_t hipblasIzaminStridedBatchedCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count, int* result) { return hipblasIzaminStridedBatched( handle, n, (const hipDoubleComplex*)x, incx, stridex, batch_count, result); } hipblasStatus_t hipblasIcaminStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* x, int64_t incx, hipblasStride stridex, int64_t batch_count, int64_t* result) { return hipblasIcaminStridedBatched_64( handle, n, (const hipComplex*)x, incx, stridex, batch_count, result); } hipblasStatus_t hipblasIzaminStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, int64_t batch_count, int64_t* result) { return hipblasIzaminStridedBatched_64( handle, n, (const hipDoubleComplex*)x, incx, stridex, batch_count, result); } // scal hipblasStatus_t hipblasCscalCast( hipblasHandle_t handle, int n, const hipblasComplex* alpha, hipblasComplex* x, int incx) { return hipblasCscal(handle, n, (const hipComplex*)alpha, (hipComplex*)x, incx); } hipblasStatus_t hipblasCsscalCast( hipblasHandle_t handle, int n, const float* alpha, hipblasComplex* x, int incx) { return hipblasCsscal(handle, n, alpha, (hipComplex*)x, incx); } hipblasStatus_t hipblasZscalCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* x, int incx) { return hipblasZscal(handle, n, (const hipDoubleComplex*)alpha, (hipDoubleComplex*)x, incx); } hipblasStatus_t hipblasZdscalCast( hipblasHandle_t handle, int n, const double* alpha, hipblasDoubleComplex* x, int incx) { return hipblasZdscal(handle, n, alpha, (hipDoubleComplex*)x, incx); } // scal_64 hipblasStatus_t hipblasCscalCast_64( hipblasHandle_t handle, int64_t n, const hipblasComplex* alpha, hipblasComplex* x, int64_t incx) { return hipblasCscal_64(handle, n, (const hipComplex*)alpha, (hipComplex*)x, incx); } hipblasStatus_t hipblasCsscalCast_64( hipblasHandle_t handle, int64_t n, const float* alpha, hipblasComplex* x, int64_t incx) { return hipblasCsscal_64(handle, n, alpha, (hipComplex*)x, incx); } hipblasStatus_t hipblasZscalCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* x, int64_t incx) { return hipblasZscal_64(handle, n, (const hipDoubleComplex*)alpha, (hipDoubleComplex*)x, incx); } hipblasStatus_t hipblasZdscalCast_64( hipblasHandle_t handle, int64_t n, const double* alpha, hipblasDoubleComplex* x, int64_t incx) { return hipblasZdscal_64(handle, n, alpha, (hipDoubleComplex*)x, incx); } // batched hipblasStatus_t hipblasCscalBatchedCast(hipblasHandle_t handle, int n, const hipblasComplex* alpha, hipblasComplex* const x[], int incx, int batch_count) { return hipblasCscalBatched( handle, n, (const hipComplex*)alpha, (hipComplex* const*)x, incx, batch_count); } hipblasStatus_t hipblasCsscalBatchedCast(hipblasHandle_t handle, int n, const float* alpha, hipblasComplex* const x[], int incx, int batch_count) { return hipblasCsscalBatched(handle, n, alpha, (hipComplex* const*)x, incx, batch_count); } hipblasStatus_t hipblasZscalBatchedCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* const x[], int incx, int batch_count) { return hipblasZscalBatched( handle, n, (const hipDoubleComplex*)alpha, (hipDoubleComplex* const*)x, incx, batch_count); } hipblasStatus_t hipblasZdscalBatchedCast(hipblasHandle_t handle, int n, const double* alpha, hipblasDoubleComplex* const x[], int incx, int batch_count) { return hipblasZdscalBatched(handle, n, alpha, (hipDoubleComplex* const*)x, incx, batch_count); } // batched_64 hipblasStatus_t hipblasCscalBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* alpha, hipblasComplex* const x[], int64_t incx, int64_t batch_count) { return hipblasCscalBatched_64( handle, n, (const hipComplex*)alpha, (hipComplex* const*)x, incx, batch_count); } hipblasStatus_t hipblasCsscalBatchedCast_64(hipblasHandle_t handle, int64_t n, const float* alpha, hipblasComplex* const x[], int64_t incx, int64_t batch_count) { return hipblasCsscalBatched_64(handle, n, alpha, (hipComplex* const*)x, incx, batch_count); } hipblasStatus_t hipblasZscalBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* const x[], int64_t incx, int64_t batch_count) { return hipblasZscalBatched_64( handle, n, (const hipDoubleComplex*)alpha, (hipDoubleComplex* const*)x, incx, batch_count); } hipblasStatus_t hipblasZdscalBatchedCast_64(hipblasHandle_t handle, int64_t n, const double* alpha, hipblasDoubleComplex* const x[], int64_t incx, int64_t batch_count) { return hipblasZdscalBatched_64( handle, n, alpha, (hipDoubleComplex* const*)x, incx, batch_count); } // strided_batched hipblasStatus_t hipblasCscalStridedBatchedCast(hipblasHandle_t handle, int n, const hipblasComplex* alpha, hipblasComplex* x, int incx, hipblasStride stridex, int batch_count) { return hipblasCscalStridedBatched( handle, n, (const hipComplex*)alpha, (hipComplex*)x, incx, stridex, batch_count); } hipblasStatus_t hipblasCsscalStridedBatchedCast(hipblasHandle_t handle, int n, const float* alpha, hipblasComplex* x, int incx, hipblasStride stridex, int batch_count) { return hipblasCsscalStridedBatched( handle, n, alpha, (hipComplex*)x, incx, stridex, batch_count); } hipblasStatus_t hipblasZscalStridedBatchedCast(hipblasHandle_t handle, int n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count) { return hipblasZscalStridedBatched(handle, n, (const hipDoubleComplex*)alpha, (hipDoubleComplex*)x, incx, stridex, batch_count); } hipblasStatus_t hipblasZdscalStridedBatchedCast(hipblasHandle_t handle, int n, const double* alpha, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count) { return hipblasZdscalStridedBatched( handle, n, alpha, (hipDoubleComplex*)x, incx, stridex, batch_count); } // strided_batched_64 hipblasStatus_t hipblasCscalStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasComplex* alpha, hipblasComplex* x, int64_t incx, hipblasStride stridex, int64_t batch_count) { return hipblasCscalStridedBatched_64( handle, n, (const hipComplex*)alpha, (hipComplex*)x, incx, stridex, batch_count); } hipblasStatus_t hipblasCsscalStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const float* alpha, hipblasComplex* x, int64_t incx, hipblasStride stridex, int64_t batch_count) { return hipblasCsscalStridedBatched_64( handle, n, alpha, (hipComplex*)x, incx, stridex, batch_count); } hipblasStatus_t hipblasZscalStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const hipblasDoubleComplex* alpha, hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, int64_t batch_count) { return hipblasZscalStridedBatched_64(handle, n, (const hipDoubleComplex*)alpha, (hipDoubleComplex*)x, incx, stridex, batch_count); } hipblasStatus_t hipblasZdscalStridedBatchedCast_64(hipblasHandle_t handle, int64_t n, const double* alpha, hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, int64_t batch_count) { return hipblasZdscalStridedBatched_64( handle, n, alpha, (hipDoubleComplex*)x, incx, stridex, batch_count); } /* * =========================================================================== * level 2 BLAS * =========================================================================== */ // gbmv hipblasStatus_t hipblasCgbmvCast(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy) { return hipblasCgbmv(handle, transA, m, n, kl, ku, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)x, incx, (const hipComplex*)beta, (hipComplex*)y, incy); } hipblasStatus_t hipblasZgbmvCast(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy) { return hipblasZgbmv(handle, transA, m, n, kl, ku, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy); } // gbmv_64 hipblasStatus_t hipblasCgbmvCast_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n, int64_t kl, int64_t ku, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, const hipblasComplex* x, int64_t incx, const hipblasComplex* beta, hipblasComplex* y, int64_t incy) { return hipblasCgbmv_64(handle, transA, m, n, kl, ku, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)x, incx, (const hipComplex*)beta, (hipComplex*)y, incy); } hipblasStatus_t hipblasZgbmvCast_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n, int64_t kl, int64_t ku, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, const hipblasDoubleComplex* x, int64_t incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int64_t incy) { return hipblasZgbmv_64(handle, transA, m, n, kl, ku, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy); } // gbmv_batched hipblasStatus_t hipblasCgbmvBatchedCast(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batch_count) { return hipblasCgbmvBatched(handle, transA, m, n, kl, ku, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)x, incx, (const hipComplex*)beta, (hipComplex* const*)y, incy, batch_count); } hipblasStatus_t hipblasZgbmvBatchedCast(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batch_count) { return hipblasZgbmvBatched(handle, transA, m, n, kl, ku, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)y, incy, batch_count); } // gbmv_batched_64 hipblasStatus_t hipblasCgbmvBatchedCast_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n, int64_t kl, int64_t ku, const hipblasComplex* alpha, const hipblasComplex* const A[], int64_t lda, const hipblasComplex* const x[], int64_t incx, const hipblasComplex* beta, hipblasComplex* const y[], int64_t incy, int64_t batch_count) { return hipblasCgbmvBatched_64(handle, transA, m, n, kl, ku, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)x, incx, (const hipComplex*)beta, (hipComplex* const*)y, incy, batch_count); } hipblasStatus_t hipblasZgbmvBatchedCast_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n, int64_t kl, int64_t ku, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int64_t lda, const hipblasDoubleComplex* const x[], int64_t incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int64_t incy, int64_t batch_count) { return hipblasZgbmvBatched_64(handle, transA, m, n, kl, ku, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)y, incy, batch_count); } // gbmv_strided_batched hipblasStatus_t hipblasCgbmvStridedBatchedCast(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride stride_a, const hipblasComplex* x, int incx, hipblasStride stride_x, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stride_y, int batch_count) { return hipblasCgbmvStridedBatched(handle, transA, m, n, kl, ku, (const hipComplex*)alpha, (const hipComplex*)A, lda, stride_a, (const hipComplex*)x, incx, stride_x, (const hipComplex*)beta, (hipComplex*)y, incy, stride_y, batch_count); } hipblasStatus_t hipblasZgbmvStridedBatchedCast(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, int kl, int ku, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride stride_a, const hipblasDoubleComplex* x, int incx, hipblasStride stride_x, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stride_y, int batch_count) { return hipblasZgbmvStridedBatched(handle, transA, m, n, kl, ku, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, stride_a, (const hipDoubleComplex*)x, incx, stride_x, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy, stride_y, batch_count); } // gbmv_strided_batched_64 hipblasStatus_t hipblasCgbmvStridedBatchedCast_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n, int64_t kl, int64_t ku, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, hipblasStride stride_a, const hipblasComplex* x, int64_t incx, hipblasStride stride_x, const hipblasComplex* beta, hipblasComplex* y, int64_t incy, hipblasStride stride_y, int64_t batch_count) { return hipblasCgbmvStridedBatched_64(handle, transA, m, n, kl, ku, (const hipComplex*)alpha, (const hipComplex*)A, lda, stride_a, (const hipComplex*)x, incx, stride_x, (const hipComplex*)beta, (hipComplex*)y, incy, stride_y, batch_count); } hipblasStatus_t hipblasZgbmvStridedBatchedCast_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n, int64_t kl, int64_t ku, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, hipblasStride stride_a, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stride_x, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int64_t incy, hipblasStride stride_y, int64_t batch_count) { return hipblasZgbmvStridedBatched_64(handle, transA, m, n, kl, ku, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, stride_a, (const hipDoubleComplex*)x, incx, stride_x, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy, stride_y, batch_count); } // gemv hipblasStatus_t hipblasCgemvCast(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy) { return hipblasCgemv(handle, transA, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)x, incx, (const hipComplex*)beta, (hipComplex*)y, incy); } hipblasStatus_t hipblasZgemvCast(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy) { return hipblasZgemv(handle, transA, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy); } // gemv_64 hipblasStatus_t hipblasCgemvCast_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, const hipblasComplex* x, int64_t incx, const hipblasComplex* beta, hipblasComplex* y, int64_t incy) { return hipblasCgemv_64(handle, transA, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)x, incx, (const hipComplex*)beta, (hipComplex*)y, incy); } hipblasStatus_t hipblasZgemvCast_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, const hipblasDoubleComplex* x, int64_t incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int64_t incy) { return hipblasZgemv_64(handle, transA, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy); } // gemv_batched hipblasStatus_t hipblasCgemvBatchedCast(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batch_count) { return hipblasCgemvBatched(handle, transA, m, n, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)x, incx, (const hipComplex*)beta, (hipComplex* const*)y, incy, batch_count); } hipblasStatus_t hipblasZgemvBatchedCast(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batch_count) { return hipblasZgemvBatched(handle, transA, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)y, incy, batch_count); } // gemv_batched_64 hipblasStatus_t hipblasCgemvBatchedCast_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* const A[], int64_t lda, const hipblasComplex* const x[], int64_t incx, const hipblasComplex* beta, hipblasComplex* const y[], int64_t incy, int64_t batch_count) { return hipblasCgemvBatched_64(handle, transA, m, n, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)x, incx, (const hipComplex*)beta, (hipComplex* const*)y, incy, batch_count); } hipblasStatus_t hipblasZgemvBatchedCast_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int64_t lda, const hipblasDoubleComplex* const x[], int64_t incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int64_t incy, int64_t batch_count) { return hipblasZgemvBatched_64(handle, transA, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)y, incy, batch_count); } // gemv_strided_batched hipblasStatus_t hipblasCgemvStridedBatchedCast(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasCgemvStridedBatched(handle, transA, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)x, incx, stridex, (const hipComplex*)beta, (hipComplex*)y, incy, stridey, batch_count); } hipblasStatus_t hipblasZgemvStridedBatchedCast(hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batch_count) { return hipblasZgemvStridedBatched(handle, transA, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy, stridey, batch_count); } // gemv_strided_batched_64 hipblasStatus_t hipblasCgemvStridedBatchedCast_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, hipblasStride strideA, const hipblasComplex* x, int64_t incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int64_t incy, hipblasStride stridey, int64_t batch_count) { return hipblasCgemvStridedBatched_64(handle, transA, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)x, incx, stridex, (const hipComplex*)beta, (hipComplex*)y, incy, stridey, batch_count); } hipblasStatus_t hipblasZgemvStridedBatchedCast_64(hipblasHandle_t handle, hipblasOperation_t transA, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int64_t incy, hipblasStride stridey, int64_t batch_count) { return hipblasZgemvStridedBatched_64(handle, transA, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy, stridey, batch_count); } // ger hipblasStatus_t hipblasCgeruCast(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* A, int lda) { return hipblasCgeru(handle, m, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)A, lda); } hipblasStatus_t hipblasCgercCast(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* A, int lda) { return hipblasCgerc(handle, m, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)A, lda); } hipblasStatus_t hipblasZgeruCast(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda) { return hipblasZgeru(handle, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)y, incy, (hipDoubleComplex*)A, lda); } hipblasStatus_t hipblasZgercCast(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda) { return hipblasZgerc(handle, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)y, incy, (hipDoubleComplex*)A, lda); } // ger_64 hipblasStatus_t hipblasCgeruCast_64(hipblasHandle_t handle, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* x, int64_t incx, const hipblasComplex* y, int64_t incy, hipblasComplex* A, int64_t lda) { return hipblasCgeru_64(handle, m, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)A, lda); } hipblasStatus_t hipblasCgercCast_64(hipblasHandle_t handle, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* x, int64_t incx, const hipblasComplex* y, int64_t incy, hipblasComplex* A, int64_t lda) { return hipblasCgerc_64(handle, m, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)A, lda); } hipblasStatus_t hipblasZgeruCast_64(hipblasHandle_t handle, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int64_t incx, const hipblasDoubleComplex* y, int64_t incy, hipblasDoubleComplex* A, int64_t lda) { return hipblasZgeru_64(handle, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)y, incy, (hipDoubleComplex*)A, lda); } hipblasStatus_t hipblasZgercCast_64(hipblasHandle_t handle, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int64_t incx, const hipblasDoubleComplex* y, int64_t incy, hipblasDoubleComplex* A, int64_t lda) { return hipblasZgerc_64(handle, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)y, incy, (hipDoubleComplex*)A, lda); } // ger_batched hipblasStatus_t hipblasCgeruBatchedCast(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const A[], int lda, int batch_count) { return hipblasCgeruBatched(handle, m, n, (const hipComplex*)alpha, (const hipComplex* const*)x, incx, (const hipComplex* const*)y, incy, (hipComplex* const*)A, lda, batch_count); } hipblasStatus_t hipblasCgercBatchedCast(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const A[], int lda, int batch_count) { return hipblasCgercBatched(handle, m, n, (const hipComplex*)alpha, (const hipComplex* const*)x, incx, (const hipComplex* const*)y, incy, (hipComplex* const*)A, lda, batch_count); } hipblasStatus_t hipblasZgeruBatchedCast(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const A[], int lda, int batch_count) { return hipblasZgeruBatched(handle, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex* const*)y, incy, (hipDoubleComplex* const*)A, lda, batch_count); } hipblasStatus_t hipblasZgercBatchedCast(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const A[], int lda, int batch_count) { return hipblasZgercBatched(handle, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex* const*)y, incy, (hipDoubleComplex* const*)A, lda, batch_count); } // ger_batched_64 hipblasStatus_t hipblasCgeruBatchedCast_64(hipblasHandle_t handle, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* const x[], int64_t incx, const hipblasComplex* const y[], int64_t incy, hipblasComplex* const A[], int64_t lda, int64_t batch_count) { return hipblasCgeruBatched_64(handle, m, n, (const hipComplex*)alpha, (const hipComplex* const*)x, incx, (const hipComplex* const*)y, incy, (hipComplex* const*)A, lda, batch_count); } hipblasStatus_t hipblasCgercBatchedCast_64(hipblasHandle_t handle, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* const x[], int64_t incx, const hipblasComplex* const y[], int64_t incy, hipblasComplex* const A[], int64_t lda, int64_t batch_count) { return hipblasCgercBatched_64(handle, m, n, (const hipComplex*)alpha, (const hipComplex* const*)x, incx, (const hipComplex* const*)y, incy, (hipComplex* const*)A, lda, batch_count); } hipblasStatus_t hipblasZgeruBatchedCast_64(hipblasHandle_t handle, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int64_t incx, const hipblasDoubleComplex* const y[], int64_t incy, hipblasDoubleComplex* const A[], int64_t lda, int64_t batch_count) { return hipblasZgeruBatched_64(handle, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex* const*)y, incy, (hipDoubleComplex* const*)A, lda, batch_count); } hipblasStatus_t hipblasZgercBatchedCast_64(hipblasHandle_t handle, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int64_t incx, const hipblasDoubleComplex* const y[], int64_t incy, hipblasDoubleComplex* const A[], int64_t lda, int64_t batch_count) { return hipblasZgercBatched_64(handle, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex* const*)y, incy, (hipDoubleComplex* const*)A, lda, batch_count); } // ger_strided_batched hipblasStatus_t hipblasCgeruStridedBatchedCast(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* A, int lda, hipblasStride strideA, int batch_count) { return hipblasCgeruStridedBatched(handle, m, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, stridex, (const hipComplex*)y, incy, stridey, (hipComplex*)A, lda, strideA, batch_count); } hipblasStatus_t hipblasCgercStridedBatchedCast(hipblasHandle_t handle, int m, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* A, int lda, hipblasStride strideA, int batch_count) { return hipblasCgercStridedBatched(handle, m, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, stridex, (const hipComplex*)y, incy, stridey, (hipComplex*)A, lda, strideA, batch_count); } hipblasStatus_t hipblasZgeruStridedBatchedCast(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batch_count) { return hipblasZgeruStridedBatched(handle, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)y, incy, stridey, (hipDoubleComplex*)A, lda, strideA, batch_count); } hipblasStatus_t hipblasZgercStridedBatchedCast(hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batch_count) { return hipblasZgercStridedBatched(handle, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)y, incy, stridey, (hipDoubleComplex*)A, lda, strideA, batch_count); } // ger_strided_batched_64 hipblasStatus_t hipblasCgeruStridedBatchedCast_64(hipblasHandle_t handle, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* x, int64_t incx, hipblasStride stridex, const hipblasComplex* y, int64_t incy, hipblasStride stridey, hipblasComplex* A, int64_t lda, hipblasStride strideA, int64_t batch_count) { return hipblasCgeruStridedBatched_64(handle, m, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, stridex, (const hipComplex*)y, incy, stridey, (hipComplex*)A, lda, strideA, batch_count); } hipblasStatus_t hipblasCgercStridedBatchedCast_64(hipblasHandle_t handle, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* x, int64_t incx, hipblasStride stridex, const hipblasComplex* y, int64_t incy, hipblasStride stridey, hipblasComplex* A, int64_t lda, hipblasStride strideA, int64_t batch_count) { return hipblasCgercStridedBatched_64(handle, m, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, stridex, (const hipComplex*)y, incy, stridey, (hipComplex*)A, lda, strideA, batch_count); } hipblasStatus_t hipblasZgeruStridedBatchedCast_64(hipblasHandle_t handle, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, const hipblasDoubleComplex* y, int64_t incy, hipblasStride stridey, hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, int64_t batch_count) { return hipblasZgeruStridedBatched_64(handle, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)y, incy, stridey, (hipDoubleComplex*)A, lda, strideA, batch_count); } hipblasStatus_t hipblasZgercStridedBatchedCast_64(hipblasHandle_t handle, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, const hipblasDoubleComplex* y, int64_t incy, hipblasStride stridey, hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, int64_t batch_count) { return hipblasZgercStridedBatched_64(handle, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)y, incy, stridey, (hipDoubleComplex*)A, lda, strideA, batch_count); } // hbmv hipblasStatus_t hipblasChbmvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy) { return hipblasChbmv(handle, uplo, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)x, incx, (const hipComplex*)beta, (hipComplex*)y, incy); } hipblasStatus_t hipblasZhbmvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy) { return hipblasZhbmv(handle, uplo, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy); } // hbmv_64 hipblasStatus_t hipblasChbmvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, const hipblasComplex* x, int64_t incx, const hipblasComplex* beta, hipblasComplex* y, int64_t incy) { return hipblasChbmv_64(handle, uplo, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)x, incx, (const hipComplex*)beta, (hipComplex*)y, incy); } hipblasStatus_t hipblasZhbmvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, const hipblasDoubleComplex* x, int64_t incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int64_t incy) { return hipblasZhbmv_64(handle, uplo, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy); } // hbmv_batched hipblasStatus_t hipblasChbmvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batchCount) { return hipblasChbmvBatched(handle, uplo, n, k, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)x, incx, (const hipComplex*)beta, (hipComplex* const*)y, incy, batchCount); } hipblasStatus_t hipblasZhbmvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batchCount) { return hipblasZhbmvBatched(handle, uplo, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)y, incy, batchCount); } // hbmv_batched_64 hipblasStatus_t hipblasChbmvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* const A[], int64_t lda, const hipblasComplex* const x[], int64_t incx, const hipblasComplex* beta, hipblasComplex* const y[], int64_t incy, int64_t batchCount) { return hipblasChbmvBatched_64(handle, uplo, n, k, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)x, incx, (const hipComplex*)beta, (hipComplex* const*)y, incy, batchCount); } hipblasStatus_t hipblasZhbmvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int64_t lda, const hipblasDoubleComplex* const x[], int64_t incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int64_t incy, int64_t batchCount) { return hipblasZhbmvBatched_64(handle, uplo, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)y, incy, batchCount); } // hbmv_strided_batched hipblasStatus_t hipblasChbmvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stridey, int batchCount) { return hipblasChbmvStridedBatched(handle, uplo, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)x, incx, stridex, (const hipComplex*)beta, (hipComplex*)y, incy, stridey, batchCount); } hipblasStatus_t hipblasZhbmvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batchCount) { return hipblasZhbmvStridedBatched(handle, uplo, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy, stridey, batchCount); } // hbmv_strided_batched_64 hipblasStatus_t hipblasChbmvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, hipblasStride strideA, const hipblasComplex* x, int64_t incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int64_t incy, hipblasStride stridey, int64_t batchCount) { return hipblasChbmvStridedBatched_64(handle, uplo, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)x, incx, stridex, (const hipComplex*)beta, (hipComplex*)y, incy, stridey, batchCount); } hipblasStatus_t hipblasZhbmvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int64_t incy, hipblasStride stridey, int64_t batchCount) { return hipblasZhbmvStridedBatched_64(handle, uplo, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy, stridey, batchCount); } // hemv hipblasStatus_t hipblasChemvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy) { return hipblasChemv(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)x, incx, (const hipComplex*)beta, (hipComplex*)y, incy); } hipblasStatus_t hipblasZhemvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy) { return hipblasZhemv(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy); } // hemv_64 hipblasStatus_t hipblasChemvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, const hipblasComplex* x, int64_t incx, const hipblasComplex* beta, hipblasComplex* y, int64_t incy) { return hipblasChemv_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)x, incx, (const hipComplex*)beta, (hipComplex*)y, incy); } hipblasStatus_t hipblasZhemvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, const hipblasDoubleComplex* x, int64_t incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int64_t incy) { return hipblasZhemv_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy); } // hemv_batched hipblasStatus_t hipblasChemvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batch_count) { return hipblasChemvBatched(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)x, incx, (const hipComplex*)beta, (hipComplex* const*)y, incy, batch_count); } hipblasStatus_t hipblasZhemvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batch_count) { return hipblasZhemvBatched(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)y, incy, batch_count); } // hemv_batched_64 hipblasStatus_t hipblasChemvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* const A[], int64_t lda, const hipblasComplex* const x[], int64_t incx, const hipblasComplex* beta, hipblasComplex* const y[], int64_t incy, int64_t batch_count) { return hipblasChemvBatched_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)x, incx, (const hipComplex*)beta, (hipComplex* const*)y, incy, batch_count); } hipblasStatus_t hipblasZhemvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int64_t lda, const hipblasDoubleComplex* const x[], int64_t incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int64_t incy, int64_t batch_count) { return hipblasZhemvBatched_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)y, incy, batch_count); } // hemv_strided_batched hipblasStatus_t hipblasChemvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride stride_a, const hipblasComplex* x, int incx, hipblasStride stride_x, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stride_y, int batch_count) { return hipblasChemvStridedBatched(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, stride_a, (const hipComplex*)x, incx, stride_x, (const hipComplex*)beta, (hipComplex*)y, incy, stride_y, batch_count); } hipblasStatus_t hipblasZhemvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride stride_a, const hipblasDoubleComplex* x, int incx, hipblasStride stride_x, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stride_y, int batch_count) { return hipblasZhemvStridedBatched(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, stride_a, (const hipDoubleComplex*)x, incx, stride_x, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy, stride_y, batch_count); } // hemv_strided_batched_64 hipblasStatus_t hipblasChemvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, hipblasStride stride_a, const hipblasComplex* x, int64_t incx, hipblasStride stride_x, const hipblasComplex* beta, hipblasComplex* y, int64_t incy, hipblasStride stride_y, int64_t batch_count) { return hipblasChemvStridedBatched_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, stride_a, (const hipComplex*)x, incx, stride_x, (const hipComplex*)beta, (hipComplex*)y, incy, stride_y, batch_count); } hipblasStatus_t hipblasZhemvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, hipblasStride stride_a, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stride_x, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int64_t incy, hipblasStride stride_y, int64_t batch_count) { return hipblasZhemvStridedBatched_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, stride_a, (const hipDoubleComplex*)x, incx, stride_x, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy, stride_y, batch_count); } // her hipblasStatus_t hipblasCherCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* x, int incx, hipblasComplex* A, int lda) { return hipblasCher(handle, uplo, n, alpha, (const hipComplex*)x, incx, (hipComplex*)A, lda); } hipblasStatus_t hipblasZherCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* A, int lda) { return hipblasZher( handle, uplo, n, alpha, (const hipDoubleComplex*)x, incx, (hipDoubleComplex*)A, lda); } // her_batched hipblasStatus_t hipblasCherBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* const x[], int incx, hipblasComplex* const A[], int lda, int batchCount) { return hipblasCherBatched(handle, uplo, n, alpha, (const hipComplex* const*)x, incx, (hipComplex* const*)A, lda, batchCount); } hipblasStatus_t hipblasZherBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const A[], int lda, int batchCount) { return hipblasZherBatched(handle, uplo, n, alpha, (const hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)A, lda, batchCount); } // her_strided_batched hipblasStatus_t hipblasCherStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* A, int lda, hipblasStride strideA, int batchCount) { return hipblasCherStridedBatched(handle, uplo, n, alpha, (const hipComplex*)x, incx, stridex, (hipComplex*)A, lda, strideA, batchCount); } hipblasStatus_t hipblasZherStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batchCount) { return hipblasZherStridedBatched(handle, uplo, n, alpha, (const hipDoubleComplex*)x, incx, stridex, (hipDoubleComplex*)A, lda, strideA, batchCount); } // her_64 hipblasStatus_t hipblasCherCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const float* alpha, const hipblasComplex* x, int64_t incx, hipblasComplex* A, int64_t lda) { return hipblasCher_64(handle, uplo, n, alpha, (const hipComplex*)x, incx, (hipComplex*)A, lda); } hipblasStatus_t hipblasZherCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const double* alpha, const hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* A, int64_t lda) { return hipblasZher_64( handle, uplo, n, alpha, (const hipDoubleComplex*)x, incx, (hipDoubleComplex*)A, lda); } // her_batched_64 hipblasStatus_t hipblasCherBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const float* alpha, const hipblasComplex* const x[], int64_t incx, hipblasComplex* const A[], int64_t lda, int64_t batchCount) { return hipblasCherBatched_64(handle, uplo, n, alpha, (const hipComplex* const*)x, incx, (hipComplex* const*)A, lda, batchCount); } hipblasStatus_t hipblasZherBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const double* alpha, const hipblasDoubleComplex* const x[], int64_t incx, hipblasDoubleComplex* const A[], int64_t lda, int64_t batchCount) { return hipblasZherBatched_64(handle, uplo, n, alpha, (const hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)A, lda, batchCount); } // her_strided_batched_64 hipblasStatus_t hipblasCherStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const float* alpha, const hipblasComplex* x, int64_t incx, hipblasStride stridex, hipblasComplex* A, int64_t lda, hipblasStride strideA, int64_t batchCount) { return hipblasCherStridedBatched_64(handle, uplo, n, alpha, (const hipComplex*)x, incx, stridex, (hipComplex*)A, lda, strideA, batchCount); } hipblasStatus_t hipblasZherStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const double* alpha, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, int64_t batchCount) { return hipblasZherStridedBatched_64(handle, uplo, n, alpha, (const hipDoubleComplex*)x, incx, stridex, (hipDoubleComplex*)A, lda, strideA, batchCount); } // her2 hipblasStatus_t hipblasCher2Cast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* A, int lda) { return hipblasCher2(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)A, lda); } hipblasStatus_t hipblasZher2Cast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda) { return hipblasZher2(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)y, incy, (hipDoubleComplex*)A, lda); } // her2_batched hipblasStatus_t hipblasCher2BatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const A[], int lda, int batchCount) { return hipblasCher2Batched(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex* const*)x, incx, (const hipComplex* const*)y, incy, (hipComplex* const*)A, lda, batchCount); } hipblasStatus_t hipblasZher2BatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const A[], int lda, int batchCount) { return hipblasZher2Batched(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex* const*)y, incy, (hipDoubleComplex* const*)A, lda, batchCount); } // her2_strided_batched hipblasStatus_t hipblasCher2StridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* A, int lda, hipblasStride strideA, int batchCount) { return hipblasCher2StridedBatched(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, stridex, (const hipComplex*)y, incy, stridey, (hipComplex*)A, lda, strideA, batchCount); } hipblasStatus_t hipblasZher2StridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batchCount) { return hipblasZher2StridedBatched(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)y, incy, stridey, (hipDoubleComplex*)A, lda, strideA, batchCount); } // her2_64 hipblasStatus_t hipblasCher2Cast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* x, int64_t incx, const hipblasComplex* y, int64_t incy, hipblasComplex* A, int64_t lda) { return hipblasCher2_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)A, lda); } hipblasStatus_t hipblasZher2Cast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int64_t incx, const hipblasDoubleComplex* y, int64_t incy, hipblasDoubleComplex* A, int64_t lda) { return hipblasZher2_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)y, incy, (hipDoubleComplex*)A, lda); } // her2_batched_64 hipblasStatus_t hipblasCher2BatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* const x[], int64_t incx, const hipblasComplex* const y[], int64_t incy, hipblasComplex* const A[], int64_t lda, int64_t batchCount) { return hipblasCher2Batched_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex* const*)x, incx, (const hipComplex* const*)y, incy, (hipComplex* const*)A, lda, batchCount); } hipblasStatus_t hipblasZher2BatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int64_t incx, const hipblasDoubleComplex* const y[], int64_t incy, hipblasDoubleComplex* const A[], int64_t lda, int64_t batchCount) { return hipblasZher2Batched_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex* const*)y, incy, (hipDoubleComplex* const*)A, lda, batchCount); } // her2_strided_batched_64 hipblasStatus_t hipblasCher2StridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* x, int64_t incx, hipblasStride stridex, const hipblasComplex* y, int64_t incy, hipblasStride stridey, hipblasComplex* A, int64_t lda, hipblasStride strideA, int64_t batchCount) { return hipblasCher2StridedBatched_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, stridex, (const hipComplex*)y, incy, stridey, (hipComplex*)A, lda, strideA, batchCount); } hipblasStatus_t hipblasZher2StridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, const hipblasDoubleComplex* y, int64_t incy, hipblasStride stridey, hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, int64_t batchCount) { return hipblasZher2StridedBatched_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)y, incy, stridey, (hipDoubleComplex*)A, lda, strideA, batchCount); } // hpmv hipblasStatus_t hipblasChpmvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* AP, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy) { return hipblasChpmv(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)AP, (const hipComplex*)x, incx, (const hipComplex*)beta, (hipComplex*)y, incy); } hipblasStatus_t hipblasZhpmvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy) { return hipblasZhpmv(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)AP, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy); } // hpmv_64 hipblasStatus_t hipblasChpmvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* AP, const hipblasComplex* x, int64_t incx, const hipblasComplex* beta, hipblasComplex* y, int64_t incy) { return hipblasChpmv_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)AP, (const hipComplex*)x, incx, (const hipComplex*)beta, (hipComplex*)y, incy); } hipblasStatus_t hipblasZhpmvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, const hipblasDoubleComplex* x, int64_t incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int64_t incy) { return hipblasZhpmv_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)AP, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy); } // hpmv_batched hipblasStatus_t hipblasChpmvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const AP[], const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batchCount) { return hipblasChpmvBatched(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex* const*)AP, (const hipComplex* const*)x, incx, (const hipComplex*)beta, (hipComplex* const*)y, incy, batchCount); } hipblasStatus_t hipblasZhpmvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const AP[], const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batchCount) { return hipblasZhpmvBatched(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)AP, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)y, incy, batchCount); } // hpmv_batched_64 hipblasStatus_t hipblasChpmvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* const AP[], const hipblasComplex* const x[], int64_t incx, const hipblasComplex* beta, hipblasComplex* const y[], int64_t incy, int64_t batchCount) { return hipblasChpmvBatched_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex* const*)AP, (const hipComplex* const*)x, incx, (const hipComplex*)beta, (hipComplex* const*)y, incy, batchCount); } hipblasStatus_t hipblasZhpmvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const AP[], const hipblasDoubleComplex* const x[], int64_t incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int64_t incy, int64_t batchCount) { return hipblasZhpmvBatched_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)AP, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)y, incy, batchCount); } // hpmv_strided_batched hipblasStatus_t hipblasChpmvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* AP, hipblasStride strideAP, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stridey, int batchCount) { return hipblasChpmvStridedBatched(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)AP, strideAP, (const hipComplex*)x, incx, stridex, (const hipComplex*)beta, (hipComplex*)y, incy, stridey, batchCount); } hipblasStatus_t hipblasZhpmvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, hipblasStride strideAP, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batchCount) { return hipblasZhpmvStridedBatched(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)AP, strideAP, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy, stridey, batchCount); } // hpmv_strided_batched_64 hipblasStatus_t hipblasChpmvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* AP, hipblasStride strideAP, const hipblasComplex* x, int64_t incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int64_t incy, hipblasStride stridey, int64_t batchCount) { return hipblasChpmvStridedBatched_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)AP, strideAP, (const hipComplex*)x, incx, stridex, (const hipComplex*)beta, (hipComplex*)y, incy, stridey, batchCount); } hipblasStatus_t hipblasZhpmvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, hipblasStride strideAP, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int64_t incy, hipblasStride stridey, int64_t batchCount) { return hipblasZhpmvStridedBatched_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)AP, strideAP, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy, stridey, batchCount); } // hpr hipblasStatus_t hipblasChprCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* x, int incx, hipblasComplex* AP) { return hipblasChpr(handle, uplo, n, alpha, (const hipComplex*)x, incx, (hipComplex*)AP); } hipblasStatus_t hipblasZhprCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* AP) { return hipblasZhpr( handle, uplo, n, alpha, (const hipDoubleComplex*)x, incx, (hipDoubleComplex*)AP); } // hpr_64 hipblasStatus_t hipblasChprCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const float* alpha, const hipblasComplex* x, int64_t incx, hipblasComplex* AP) { return hipblasChpr_64(handle, uplo, n, alpha, (const hipComplex*)x, incx, (hipComplex*)AP); } hipblasStatus_t hipblasZhprCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const double* alpha, const hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* AP) { return hipblasZhpr_64( handle, uplo, n, alpha, (const hipDoubleComplex*)x, incx, (hipDoubleComplex*)AP); } // hpr_batched hipblasStatus_t hipblasChprBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* const x[], int incx, hipblasComplex* const AP[], int batchCount) { return hipblasChprBatched(handle, uplo, n, alpha, (const hipComplex* const*)x, incx, (hipComplex* const*)AP, batchCount); } hipblasStatus_t hipblasZhprBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const AP[], int batchCount) { return hipblasZhprBatched(handle, uplo, n, alpha, (const hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)AP, batchCount); } // hpr_batched_64 hipblasStatus_t hipblasChprBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const float* alpha, const hipblasComplex* const x[], int64_t incx, hipblasComplex* const AP[], int64_t batchCount) { return hipblasChprBatched_64(handle, uplo, n, alpha, (const hipComplex* const*)x, incx, (hipComplex* const*)AP, batchCount); } hipblasStatus_t hipblasZhprBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const double* alpha, const hipblasDoubleComplex* const x[], int64_t incx, hipblasDoubleComplex* const AP[], int64_t batchCount) { return hipblasZhprBatched_64(handle, uplo, n, alpha, (const hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)AP, batchCount); } // hpr_strided_batched hipblasStatus_t hipblasChprStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* AP, hipblasStride strideAP, int batchCount) { return hipblasChprStridedBatched(handle, uplo, n, alpha, (const hipComplex*)x, incx, stridex, (hipComplex*)AP, strideAP, batchCount); } hipblasStatus_t hipblasZhprStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* AP, hipblasStride strideAP, int batchCount) { return hipblasZhprStridedBatched(handle, uplo, n, alpha, (const hipDoubleComplex*)x, incx, stridex, (hipDoubleComplex*)AP, strideAP, batchCount); } // hpr_strided_batched_64 hipblasStatus_t hipblasChprStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const float* alpha, const hipblasComplex* x, int64_t incx, hipblasStride stridex, hipblasComplex* AP, hipblasStride strideAP, int64_t batchCount) { return hipblasChprStridedBatched_64(handle, uplo, n, alpha, (const hipComplex*)x, incx, stridex, (hipComplex*)AP, strideAP, batchCount); } hipblasStatus_t hipblasZhprStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const double* alpha, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, hipblasDoubleComplex* AP, hipblasStride strideAP, int64_t batchCount) { return hipblasZhprStridedBatched_64(handle, uplo, n, alpha, (const hipDoubleComplex*)x, incx, stridex, (hipDoubleComplex*)AP, strideAP, batchCount); } // hpr2 hipblasStatus_t hipblasChpr2Cast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* AP) { return hipblasChpr2(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)AP); } hipblasStatus_t hipblasZhpr2Cast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* AP) { return hipblasZhpr2(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)y, incy, (hipDoubleComplex*)AP); } // hpr2 hipblasStatus_t hipblasChpr2Cast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* x, int64_t incx, const hipblasComplex* y, int64_t incy, hipblasComplex* AP) { return hipblasChpr2_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)AP); } hipblasStatus_t hipblasZhpr2Cast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int64_t incx, const hipblasDoubleComplex* y, int64_t incy, hipblasDoubleComplex* AP) { return hipblasZhpr2_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)y, incy, (hipDoubleComplex*)AP); } // hpr2_batched hipblasStatus_t hipblasChpr2BatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const AP[], int batchCount) { return hipblasChpr2Batched(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex* const*)x, incx, (const hipComplex* const*)y, incy, (hipComplex* const*)AP, batchCount); } hipblasStatus_t hipblasZhpr2BatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const AP[], int batchCount) { return hipblasZhpr2Batched(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex* const*)y, incy, (hipDoubleComplex* const*)AP, batchCount); } // hpr2_batched_64 hipblasStatus_t hipblasChpr2BatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* const x[], int64_t incx, const hipblasComplex* const y[], int64_t incy, hipblasComplex* const AP[], int64_t batchCount) { return hipblasChpr2Batched_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex* const*)x, incx, (const hipComplex* const*)y, incy, (hipComplex* const*)AP, batchCount); } hipblasStatus_t hipblasZhpr2BatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int64_t incx, const hipblasDoubleComplex* const y[], int64_t incy, hipblasDoubleComplex* const AP[], int64_t batchCount) { return hipblasZhpr2Batched_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex* const*)y, incy, (hipDoubleComplex* const*)AP, batchCount); } // hpr2_strided_batched hipblasStatus_t hipblasChpr2StridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* AP, hipblasStride strideAP, int batchCount) { return hipblasChpr2StridedBatched(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, stridex, (const hipComplex*)y, incy, stridey, (hipComplex*)AP, strideAP, batchCount); } hipblasStatus_t hipblasZhpr2StridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* AP, hipblasStride strideAP, int batchCount) { return hipblasZhpr2StridedBatched(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)y, incy, stridey, (hipDoubleComplex*)AP, strideAP, batchCount); } // hpr2_strided_batched_64 hipblasStatus_t hipblasChpr2StridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* x, int64_t incx, hipblasStride stridex, const hipblasComplex* y, int64_t incy, hipblasStride stridey, hipblasComplex* AP, hipblasStride strideAP, int64_t batchCount) { return hipblasChpr2StridedBatched_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, stridex, (const hipComplex*)y, incy, stridey, (hipComplex*)AP, strideAP, batchCount); } hipblasStatus_t hipblasZhpr2StridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, const hipblasDoubleComplex* y, int64_t incy, hipblasStride stridey, hipblasDoubleComplex* AP, hipblasStride strideAP, int64_t batchCount) { return hipblasZhpr2StridedBatched_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)y, incy, stridey, (hipDoubleComplex*)AP, strideAP, batchCount); } // sbmv, spmv, spr2 no complex versions // spr hipblasStatus_t hipblasCsprCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasComplex* AP) { return hipblasCspr( handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (hipComplex*)AP); } hipblasStatus_t hipblasZsprCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* AP) { return hipblasZspr(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, (hipDoubleComplex*)AP); } // spr_64 hipblasStatus_t hipblasCsprCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* x, int64_t incx, hipblasComplex* AP) { return hipblasCspr_64( handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (hipComplex*)AP); } hipblasStatus_t hipblasZsprCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* AP) { return hipblasZspr_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, (hipDoubleComplex*)AP); } // spr_batched hipblasStatus_t hipblasCsprBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, hipblasComplex* const AP[], int batchCount) { return hipblasCsprBatched(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex* const*)x, incx, (hipComplex* const*)AP, batchCount); } hipblasStatus_t hipblasZsprBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const AP[], int batchCount) { return hipblasZsprBatched(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)AP, batchCount); } // spr_batched_64 hipblasStatus_t hipblasCsprBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* const x[], int64_t incx, hipblasComplex* const AP[], int64_t batchCount) { return hipblasCsprBatched_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex* const*)x, incx, (hipComplex* const*)AP, batchCount); } hipblasStatus_t hipblasZsprBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int64_t incx, hipblasDoubleComplex* const AP[], int64_t batchCount) { return hipblasZsprBatched_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)AP, batchCount); } // spr_strided_batched hipblasStatus_t hipblasCsprStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* AP, hipblasStride strideAP, int batchCount) { return hipblasCsprStridedBatched(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, stridex, (hipComplex*)AP, strideAP, batchCount); } hipblasStatus_t hipblasZsprStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* AP, hipblasStride strideAP, int batchCount) { return hipblasZsprStridedBatched(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, stridex, (hipDoubleComplex*)AP, strideAP, batchCount); } // spr_strided_batched_64 hipblasStatus_t hipblasCsprStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* x, int64_t incx, hipblasStride stridex, hipblasComplex* AP, hipblasStride strideAP, int64_t batchCount) { return hipblasCsprStridedBatched_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, stridex, (hipComplex*)AP, strideAP, batchCount); } hipblasStatus_t hipblasZsprStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, hipblasDoubleComplex* AP, hipblasStride strideAP, int64_t batchCount) { return hipblasZsprStridedBatched_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, stridex, (hipDoubleComplex*)AP, strideAP, batchCount); } // symv hipblasStatus_t hipblasCsymvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, const hipblasComplex* beta, hipblasComplex* y, int incy) { return hipblasCsymv(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)x, incx, (const hipComplex*)beta, (hipComplex*)y, incy); } hipblasStatus_t hipblasZsymvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy) { return hipblasZsymv(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy); } // symv_64 hipblasStatus_t hipblasCsymvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, const hipblasComplex* x, int64_t incx, const hipblasComplex* beta, hipblasComplex* y, int64_t incy) { return hipblasCsymv_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)x, incx, (const hipComplex*)beta, (hipComplex*)y, incy); } hipblasStatus_t hipblasZsymvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, const hipblasDoubleComplex* x, int64_t incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int64_t incy) { return hipblasZsymv_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy); } // symv_batched hipblasStatus_t hipblasCsymvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, const hipblasComplex* beta, hipblasComplex* const y[], int incy, int batchCount) { return hipblasCsymvBatched(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)x, incx, (const hipComplex*)beta, (hipComplex* const*)y, incy, batchCount); } hipblasStatus_t hipblasZsymvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int incy, int batchCount) { return hipblasZsymvBatched(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)y, incy, batchCount); } // symv_batched_64 hipblasStatus_t hipblasCsymvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* const A[], int64_t lda, const hipblasComplex* const x[], int64_t incx, const hipblasComplex* beta, hipblasComplex* const y[], int64_t incy, int64_t batchCount) { return hipblasCsymvBatched_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)x, incx, (const hipComplex*)beta, (hipComplex* const*)y, incy, batchCount); } hipblasStatus_t hipblasZsymvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int64_t lda, const hipblasDoubleComplex* const x[], int64_t incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const y[], int64_t incy, int64_t batchCount) { return hipblasZsymvBatched_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)y, incy, batchCount); } // symv_strided_batched hipblasStatus_t hipblasCsymvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int incy, hipblasStride stridey, int batchCount) { return hipblasCsymvStridedBatched(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)x, incx, stridex, (const hipComplex*)beta, (hipComplex*)y, incy, stridey, batchCount); } hipblasStatus_t hipblasZsymvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int incy, hipblasStride stridey, int batchCount) { return hipblasZsymvStridedBatched(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy, stridey, batchCount); } // symv_strided_batched_64 hipblasStatus_t hipblasCsymvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, hipblasStride strideA, const hipblasComplex* x, int64_t incx, hipblasStride stridex, const hipblasComplex* beta, hipblasComplex* y, int64_t incy, hipblasStride stridey, int64_t batchCount) { return hipblasCsymvStridedBatched_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)x, incx, stridex, (const hipComplex*)beta, (hipComplex*)y, incy, stridey, batchCount); } hipblasStatus_t hipblasZsymvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int64_t incy, hipblasStride stridey, int64_t batchCount) { return hipblasZsymvStridedBatched_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)beta, (hipDoubleComplex*)y, incy, stridey, batchCount); } // syr hipblasStatus_t hipblasCsyrCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasComplex* A, int lda) { return hipblasCsyr( handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (hipComplex*)A, lda); } hipblasStatus_t hipblasZsyrCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* A, int lda) { return hipblasZsyr(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, (hipDoubleComplex*)A, lda); } // syr_64 hipblasStatus_t hipblasCsyrCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* x, int64_t incx, hipblasComplex* A, int64_t lda) { return hipblasCsyr_64( handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (hipComplex*)A, lda); } hipblasStatus_t hipblasZsyrCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* A, int64_t lda) { return hipblasZsyr_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, (hipDoubleComplex*)A, lda); } // syr_batched hipblasStatus_t hipblasCsyrBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, hipblasComplex* const A[], int lda, int batch_count) { return hipblasCsyrBatched(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex* const*)x, incx, (hipComplex* const*)A, lda, batch_count); } hipblasStatus_t hipblasZsyrBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const A[], int lda, int batch_count) { return hipblasZsyrBatched(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)A, lda, batch_count); } // syr_batched_64 hipblasStatus_t hipblasCsyrBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* const x[], int64_t incx, hipblasComplex* const A[], int64_t lda, int64_t batch_count) { return hipblasCsyrBatched_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex* const*)x, incx, (hipComplex* const*)A, lda, batch_count); } hipblasStatus_t hipblasZsyrBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int64_t incx, hipblasDoubleComplex* const A[], int64_t lda, int64_t batch_count) { return hipblasZsyrBatched_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)A, lda, batch_count); } // syr_strided_batched hipblasStatus_t hipblasCsyrStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, hipblasComplex* A, int lda, hipblasStride strideA, int batch_count) { return hipblasCsyrStridedBatched(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, stridex, (hipComplex*)A, lda, strideA, batch_count); } hipblasStatus_t hipblasZsyrStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batch_count) { return hipblasZsyrStridedBatched(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, stridex, (hipDoubleComplex*)A, lda, strideA, batch_count); } // syr_strided_batched_64 hipblasStatus_t hipblasCsyrStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* x, int64_t incx, hipblasStride stridex, hipblasComplex* A, int64_t lda, hipblasStride strideA, int64_t batch_count) { return hipblasCsyrStridedBatched_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, stridex, (hipComplex*)A, lda, strideA, batch_count); } hipblasStatus_t hipblasZsyrStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, int64_t batch_count) { return hipblasZsyrStridedBatched_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, stridex, (hipDoubleComplex*)A, lda, strideA, batch_count); } // syr2 hipblasStatus_t hipblasCsyr2Cast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, const hipblasComplex* y, int incy, hipblasComplex* A, int lda) { return hipblasCsyr2(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)A, lda); } hipblasStatus_t hipblasZsyr2Cast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, const hipblasDoubleComplex* y, int incy, hipblasDoubleComplex* A, int lda) { return hipblasZsyr2(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)y, incy, (hipDoubleComplex*)A, lda); } // syr2 hipblasStatus_t hipblasCsyr2Cast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* x, int64_t incx, const hipblasComplex* y, int64_t incy, hipblasComplex* A, int64_t lda) { return hipblasCsyr2_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, (const hipComplex*)y, incy, (hipComplex*)A, lda); } hipblasStatus_t hipblasZsyr2Cast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int64_t incx, const hipblasDoubleComplex* y, int64_t incy, hipblasDoubleComplex* A, int64_t lda) { return hipblasZsyr2_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, (const hipDoubleComplex*)y, incy, (hipDoubleComplex*)A, lda); } // syr2_batched hipblasStatus_t hipblasCsyr2BatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* const x[], int incx, const hipblasComplex* const y[], int incy, hipblasComplex* const A[], int lda, int batchCount) { return hipblasCsyr2Batched(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex* const*)x, incx, (const hipComplex* const*)y, incy, (hipComplex* const*)A, lda, batchCount); } hipblasStatus_t hipblasZsyr2BatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int incx, const hipblasDoubleComplex* const y[], int incy, hipblasDoubleComplex* const A[], int lda, int batchCount) { return hipblasZsyr2Batched(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex* const*)y, incy, (hipDoubleComplex* const*)A, lda, batchCount); } // syr2_batched_64 hipblasStatus_t hipblasCsyr2BatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* const x[], int64_t incx, const hipblasComplex* const y[], int64_t incy, hipblasComplex* const A[], int64_t lda, int64_t batchCount) { return hipblasCsyr2Batched_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex* const*)x, incx, (const hipComplex* const*)y, incy, (hipComplex* const*)A, lda, batchCount); } hipblasStatus_t hipblasZsyr2BatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const x[], int64_t incx, const hipblasDoubleComplex* const y[], int64_t incy, hipblasDoubleComplex* const A[], int64_t lda, int64_t batchCount) { return hipblasZsyr2Batched_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)x, incx, (const hipDoubleComplex* const*)y, incy, (hipDoubleComplex* const*)A, lda, batchCount); } // syr2_strided_batched hipblasStatus_t hipblasCsyr2StridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex* alpha, const hipblasComplex* x, int incx, hipblasStride stridex, const hipblasComplex* y, int incy, hipblasStride stridey, hipblasComplex* A, int lda, hipblasStride strideA, int batchCount) { return hipblasCsyr2StridedBatched(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, stridex, (const hipComplex*)y, incy, stridey, (hipComplex*)A, lda, strideA, batchCount); } hipblasStatus_t hipblasZsyr2StridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int incx, hipblasStride stridex, const hipblasDoubleComplex* y, int incy, hipblasStride stridey, hipblasDoubleComplex* A, int lda, hipblasStride strideA, int batchCount) { return hipblasZsyr2StridedBatched(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)y, incy, stridey, (hipDoubleComplex*)A, lda, strideA, batchCount); } // syr2_strided_batched_64 hipblasStatus_t hipblasCsyr2StridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasComplex* alpha, const hipblasComplex* x, int64_t incx, hipblasStride stridex, const hipblasComplex* y, int64_t incy, hipblasStride stridey, hipblasComplex* A, int64_t lda, hipblasStride strideA, int64_t batchCount) { return hipblasCsyr2StridedBatched_64(handle, uplo, n, (const hipComplex*)alpha, (const hipComplex*)x, incx, stridex, (const hipComplex*)y, incy, stridey, (hipComplex*)A, lda, strideA, batchCount); } hipblasStatus_t hipblasZsyr2StridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, const hipblasDoubleComplex* y, int64_t incy, hipblasStride stridey, hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, int64_t batchCount) { return hipblasZsyr2StridedBatched_64(handle, uplo, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)x, incx, stridex, (const hipDoubleComplex*)y, incy, stridey, (hipDoubleComplex*)A, lda, strideA, batchCount); } // trsv hipblasStatus_t hipblasCtrsvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* A, int lda, hipblasComplex* x, int incx) { return hipblasCtrsv( handle, uplo, transA, diag, m, (const hipComplex*)A, lda, (hipComplex*)x, incx); } hipblasStatus_t hipblasZtrsvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx) { return hipblasZtrsv( handle, uplo, transA, diag, m, (const hipDoubleComplex*)A, lda, (hipDoubleComplex*)x, incx); } // trsv_64 hipblasStatus_t hipblasCtrsvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasComplex* A, int64_t lda, hipblasComplex* x, int64_t incx) { return hipblasCtrsv_64( handle, uplo, transA, diag, m, (const hipComplex*)A, lda, (hipComplex*)x, incx); } hipblasStatus_t hipblasZtrsvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* x, int64_t incx) { return hipblasZtrsv_64( handle, uplo, transA, diag, m, (const hipDoubleComplex*)A, lda, (hipDoubleComplex*)x, incx); } // trsv_batched hipblasStatus_t hipblasCtrsvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* const A[], int lda, hipblasComplex* const x[], int incx, int batch_count) { return hipblasCtrsvBatched(handle, uplo, transA, diag, m, (const hipComplex* const*)A, lda, (hipComplex* const*)x, incx, batch_count); } hipblasStatus_t hipblasZtrsvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const x[], int incx, int batch_count) { return hipblasZtrsvBatched(handle, uplo, transA, diag, m, (const hipDoubleComplex* const*)A, lda, (hipDoubleComplex* const*)x, incx, batch_count); } // trsv_batched_64 hipblasStatus_t hipblasCtrsvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasComplex* const A[], int64_t lda, hipblasComplex* const x[], int64_t incx, int64_t batch_count) { return hipblasCtrsvBatched_64(handle, uplo, transA, diag, m, (const hipComplex* const*)A, lda, (hipComplex* const*)x, incx, batch_count); } hipblasStatus_t hipblasZtrsvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasDoubleComplex* const A[], int64_t lda, hipblasDoubleComplex* const x[], int64_t incx, int64_t batch_count) { return hipblasZtrsvBatched_64(handle, uplo, transA, diag, m, (const hipDoubleComplex* const*)A, lda, (hipDoubleComplex* const*)x, incx, batch_count); } // trsv_strided_batched hipblasStatus_t hipblasCtrsvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* A, int lda, hipblasStride strideA, hipblasComplex* x, int incx, hipblasStride stridex, int batch_count) { return hipblasCtrsvStridedBatched(handle, uplo, transA, diag, m, (const hipComplex*)A, lda, strideA, (hipComplex*)x, incx, stridex, batch_count); } hipblasStatus_t hipblasZtrsvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batch_count) { return hipblasZtrsvStridedBatched(handle, uplo, transA, diag, m, (const hipDoubleComplex*)A, lda, strideA, (hipDoubleComplex*)x, incx, stridex, batch_count); } // trsv_strided_batched_64 hipblasStatus_t hipblasCtrsvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasComplex* A, int64_t lda, hipblasStride strideA, hipblasComplex* x, int64_t incx, hipblasStride stridex, int64_t batch_count) { return hipblasCtrsvStridedBatched_64(handle, uplo, transA, diag, m, (const hipComplex*)A, lda, strideA, (hipComplex*)x, incx, stridex, batch_count); } hipblasStatus_t hipblasZtrsvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, int64_t batch_count) { return hipblasZtrsvStridedBatched_64(handle, uplo, transA, diag, m, (const hipDoubleComplex*)A, lda, strideA, (hipDoubleComplex*)x, incx, stridex, batch_count); } // tbmv hipblasStatus_t hipblasCtbmvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex* A, int lda, hipblasComplex* x, int incx) { return hipblasCtbmv( handle, uplo, transA, diag, m, k, (const hipComplex*)A, lda, (hipComplex*)x, incx); } hipblasStatus_t hipblasZtbmvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx) { return hipblasZtbmv(handle, uplo, transA, diag, m, k, (const hipDoubleComplex*)A, lda, (hipDoubleComplex*)x, incx); } // tbmv_64 hipblasStatus_t hipblasCtbmvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t k, const hipblasComplex* A, int64_t lda, hipblasComplex* x, int64_t incx) { return hipblasCtbmv_64( handle, uplo, transA, diag, m, k, (const hipComplex*)A, lda, (hipComplex*)x, incx); } hipblasStatus_t hipblasZtbmvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t k, const hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* x, int64_t incx) { return hipblasZtbmv_64(handle, uplo, transA, diag, m, k, (const hipDoubleComplex*)A, lda, (hipDoubleComplex*)x, incx); } // tbmv_batched hipblasStatus_t hipblasCtbmvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex* const A[], int lda, hipblasComplex* const x[], int incx, int batch_count) { return hipblasCtbmvBatched(handle, uplo, transA, diag, m, k, (const hipComplex* const*)A, lda, (hipComplex* const*)x, incx, batch_count); } hipblasStatus_t hipblasZtbmvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const x[], int incx, int batch_count) { return hipblasZtbmvBatched(handle, uplo, transA, diag, m, k, (const hipDoubleComplex* const*)A, lda, (hipDoubleComplex* const*)x, incx, batch_count); } // tbmv_batched_64 hipblasStatus_t hipblasCtbmvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t k, const hipblasComplex* const A[], int64_t lda, hipblasComplex* const x[], int64_t incx, int64_t batch_count) { return hipblasCtbmvBatched_64(handle, uplo, transA, diag, m, k, (const hipComplex* const*)A, lda, (hipComplex* const*)x, incx, batch_count); } hipblasStatus_t hipblasZtbmvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t k, const hipblasDoubleComplex* const A[], int64_t lda, hipblasDoubleComplex* const x[], int64_t incx, int64_t batch_count) { return hipblasZtbmvBatched_64(handle, uplo, transA, diag, m, k, (const hipDoubleComplex* const*)A, lda, (hipDoubleComplex* const*)x, incx, batch_count); } // tbmv_strided_batched hipblasStatus_t hipblasCtbmvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex* A, int lda, hipblasStride stride_a, hipblasComplex* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasCtbmvStridedBatched(handle, uplo, transA, diag, m, k, (const hipComplex*)A, lda, stride_a, (hipComplex*)x, incx, stride_x, batch_count); } hipblasStatus_t hipblasZtbmvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex* A, int lda, hipblasStride stride_a, hipblasDoubleComplex* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasZtbmvStridedBatched(handle, uplo, transA, diag, m, k, (const hipDoubleComplex*)A, lda, stride_a, (hipDoubleComplex*)x, incx, stride_x, batch_count); } // tbmv_strided_batched_64 hipblasStatus_t hipblasCtbmvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t k, const hipblasComplex* A, int64_t lda, hipblasStride stride_a, hipblasComplex* x, int64_t incx, hipblasStride stride_x, int64_t batch_count) { return hipblasCtbmvStridedBatched_64(handle, uplo, transA, diag, m, k, (const hipComplex*)A, lda, stride_a, (hipComplex*)x, incx, stride_x, batch_count); } hipblasStatus_t hipblasZtbmvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t k, const hipblasDoubleComplex* A, int64_t lda, hipblasStride stride_a, hipblasDoubleComplex* x, int64_t incx, hipblasStride stride_x, int64_t batch_count) { return hipblasZtbmvStridedBatched_64(handle, uplo, transA, diag, m, k, (const hipDoubleComplex*)A, lda, stride_a, (hipDoubleComplex*)x, incx, stride_x, batch_count); } // tbsv hipblasStatus_t hipblasCtbsvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasComplex* A, int lda, hipblasComplex* x, int incx) { return hipblasCtbsv( handle, uplo, transA, diag, n, k, (const hipComplex*)A, lda, (hipComplex*)x, incx); } hipblasStatus_t hipblasZtbsvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx) { return hipblasZtbsv(handle, uplo, transA, diag, n, k, (const hipDoubleComplex*)A, lda, (hipDoubleComplex*)x, incx); } // tbsv_64 hipblasStatus_t hipblasCtbsvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, int64_t k, const hipblasComplex* A, int64_t lda, hipblasComplex* x, int64_t incx) { return hipblasCtbsv_64( handle, uplo, transA, diag, n, k, (const hipComplex*)A, lda, (hipComplex*)x, incx); } hipblasStatus_t hipblasZtbsvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, int64_t k, const hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* x, int64_t incx) { return hipblasZtbsv_64(handle, uplo, transA, diag, n, k, (const hipDoubleComplex*)A, lda, (hipDoubleComplex*)x, incx); } // tbsv_batched hipblasStatus_t hipblasCtbsvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasComplex* const A[], int lda, hipblasComplex* const x[], int incx, int batchCount) { return hipblasCtbsvBatched(handle, uplo, transA, diag, n, k, (const hipComplex* const*)A, lda, (hipComplex* const*)x, incx, batchCount); } hipblasStatus_t hipblasZtbsvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const x[], int incx, int batchCount) { return hipblasZtbsvBatched(handle, uplo, transA, diag, n, k, (const hipDoubleComplex* const*)A, lda, (hipDoubleComplex* const*)x, incx, batchCount); } // tbsv_batched_64 hipblasStatus_t hipblasCtbsvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, int64_t k, const hipblasComplex* const A[], int64_t lda, hipblasComplex* const x[], int64_t incx, int64_t batchCount) { return hipblasCtbsvBatched_64(handle, uplo, transA, diag, n, k, (const hipComplex* const*)A, lda, (hipComplex* const*)x, incx, batchCount); } hipblasStatus_t hipblasZtbsvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, int64_t k, const hipblasDoubleComplex* const A[], int64_t lda, hipblasDoubleComplex* const x[], int64_t incx, int64_t batchCount) { return hipblasZtbsvBatched_64(handle, uplo, transA, diag, n, k, (const hipDoubleComplex* const*)A, lda, (hipDoubleComplex* const*)x, incx, batchCount); } // tbsv_strided_batched hipblasStatus_t hipblasCtbsvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasComplex* A, int lda, hipblasStride strideA, hipblasComplex* x, int incx, hipblasStride stridex, int batchCount) { return hipblasCtbsvStridedBatched(handle, uplo, transA, diag, n, k, (const hipComplex*)A, lda, strideA, (hipComplex*)x, incx, stridex, batchCount); } hipblasStatus_t hipblasZtbsvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batchCount) { return hipblasZtbsvStridedBatched(handle, uplo, transA, diag, n, k, (const hipDoubleComplex*)A, lda, strideA, (hipDoubleComplex*)x, incx, stridex, batchCount); } // tbsv_strided_batched_64 hipblasStatus_t hipblasCtbsvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, int64_t k, const hipblasComplex* A, int64_t lda, hipblasStride strideA, hipblasComplex* x, int64_t incx, hipblasStride stridex, int64_t batchCount) { return hipblasCtbsvStridedBatched_64(handle, uplo, transA, diag, n, k, (const hipComplex*)A, lda, strideA, (hipComplex*)x, incx, stridex, batchCount); } hipblasStatus_t hipblasZtbsvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t n, int64_t k, const hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, int64_t batchCount) { return hipblasZtbsvStridedBatched_64(handle, uplo, transA, diag, n, k, (const hipDoubleComplex*)A, lda, strideA, (hipDoubleComplex*)x, incx, stridex, batchCount); } // tpmv hipblasStatus_t hipblasCtpmvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, hipblasComplex* x, int incx) { return hipblasCtpmv(handle, uplo, transA, diag, m, (const hipComplex*)AP, (hipComplex*)x, incx); } hipblasStatus_t hipblasZtpmvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, hipblasDoubleComplex* x, int incx) { return hipblasZtpmv( handle, uplo, transA, diag, m, (const hipDoubleComplex*)AP, (hipDoubleComplex*)x, incx); } // tpmv_64 hipblasStatus_t hipblasCtpmvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasComplex* AP, hipblasComplex* x, int64_t incx) { return hipblasCtpmv_64( handle, uplo, transA, diag, m, (const hipComplex*)AP, (hipComplex*)x, incx); } hipblasStatus_t hipblasZtpmvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasDoubleComplex* AP, hipblasDoubleComplex* x, int64_t incx) { return hipblasZtpmv_64( handle, uplo, transA, diag, m, (const hipDoubleComplex*)AP, (hipDoubleComplex*)x, incx); } // tpmv_batched hipblasStatus_t hipblasCtpmvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* const AP[], hipblasComplex* const x[], int incx, int batchCount) { return hipblasCtpmvBatched(handle, uplo, transA, diag, m, (const hipComplex* const*)AP, (hipComplex* const*)x, incx, batchCount); } hipblasStatus_t hipblasZtpmvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* const AP[], hipblasDoubleComplex* const x[], int incx, int batchCount) { return hipblasZtpmvBatched(handle, uplo, transA, diag, m, (const hipDoubleComplex* const*)AP, (hipDoubleComplex* const*)x, incx, batchCount); } // tpmv_batched_64 hipblasStatus_t hipblasCtpmvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasComplex* const AP[], hipblasComplex* const x[], int64_t incx, int64_t batchCount) { return hipblasCtpmvBatched_64(handle, uplo, transA, diag, m, (const hipComplex* const*)AP, (hipComplex* const*)x, incx, batchCount); } hipblasStatus_t hipblasZtpmvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasDoubleComplex* const AP[], hipblasDoubleComplex* const x[], int64_t incx, int64_t batchCount) { return hipblasZtpmvBatched_64(handle, uplo, transA, diag, m, (const hipDoubleComplex* const*)AP, (hipDoubleComplex* const*)x, incx, batchCount); } // tpmv_strided_batched hipblasStatus_t hipblasCtpmvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, hipblasStride strideAP, hipblasComplex* x, int incx, hipblasStride stridex, int batchCount) { return hipblasCtpmvStridedBatched(handle, uplo, transA, diag, m, (const hipComplex*)AP, strideAP, (hipComplex*)x, incx, stridex, batchCount); } hipblasStatus_t hipblasZtpmvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, hipblasStride strideAP, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batchCount) { return hipblasZtpmvStridedBatched(handle, uplo, transA, diag, m, (const hipDoubleComplex*)AP, strideAP, (hipDoubleComplex*)x, incx, stridex, batchCount); } // tpmv_strided_batched_64 hipblasStatus_t hipblasCtpmvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasComplex* AP, hipblasStride strideAP, hipblasComplex* x, int64_t incx, hipblasStride stridex, int64_t batchCount) { return hipblasCtpmvStridedBatched_64(handle, uplo, transA, diag, m, (const hipComplex*)AP, strideAP, (hipComplex*)x, incx, stridex, batchCount); } hipblasStatus_t hipblasZtpmvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasDoubleComplex* AP, hipblasStride strideAP, hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, int64_t batchCount) { return hipblasZtpmvStridedBatched_64(handle, uplo, transA, diag, m, (const hipDoubleComplex*)AP, strideAP, (hipDoubleComplex*)x, incx, stridex, batchCount); } // tpsv hipblasStatus_t hipblasCtpsvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, hipblasComplex* x, int incx) { return hipblasCtpsv(handle, uplo, transA, diag, m, (const hipComplex*)AP, (hipComplex*)x, incx); } hipblasStatus_t hipblasZtpsvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, hipblasDoubleComplex* x, int incx) { return hipblasZtpsv( handle, uplo, transA, diag, m, (const hipDoubleComplex*)AP, (hipDoubleComplex*)x, incx); } // tpsv_64 hipblasStatus_t hipblasCtpsvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasComplex* AP, hipblasComplex* x, int64_t incx) { return hipblasCtpsv_64( handle, uplo, transA, diag, m, (const hipComplex*)AP, (hipComplex*)x, incx); } hipblasStatus_t hipblasZtpsvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasDoubleComplex* AP, hipblasDoubleComplex* x, int64_t incx) { return hipblasZtpsv_64( handle, uplo, transA, diag, m, (const hipDoubleComplex*)AP, (hipDoubleComplex*)x, incx); } // tpsv_batched hipblasStatus_t hipblasCtpsvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* const AP[], hipblasComplex* const x[], int incx, int batchCount) { return hipblasCtpsvBatched(handle, uplo, transA, diag, m, (const hipComplex* const*)AP, (hipComplex* const*)x, incx, batchCount); } hipblasStatus_t hipblasZtpsvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* const AP[], hipblasDoubleComplex* const x[], int incx, int batchCount) { return hipblasZtpsvBatched(handle, uplo, transA, diag, m, (const hipDoubleComplex* const*)AP, (hipDoubleComplex* const*)x, incx, batchCount); } // tpsv_batched_64 hipblasStatus_t hipblasCtpsvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasComplex* const AP[], hipblasComplex* const x[], int64_t incx, int64_t batchCount) { return hipblasCtpsvBatched_64(handle, uplo, transA, diag, m, (const hipComplex* const*)AP, (hipComplex* const*)x, incx, batchCount); } hipblasStatus_t hipblasZtpsvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasDoubleComplex* const AP[], hipblasDoubleComplex* const x[], int64_t incx, int64_t batchCount) { return hipblasZtpsvBatched_64(handle, uplo, transA, diag, m, (const hipDoubleComplex* const*)AP, (hipDoubleComplex* const*)x, incx, batchCount); } // tpsv_strided_batched hipblasStatus_t hipblasCtpsvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* AP, hipblasStride strideAP, hipblasComplex* x, int incx, hipblasStride stridex, int batchCount) { return hipblasCtpsvStridedBatched(handle, uplo, transA, diag, m, (const hipComplex*)AP, strideAP, (hipComplex*)x, incx, stridex, batchCount); } hipblasStatus_t hipblasZtpsvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* AP, hipblasStride strideAP, hipblasDoubleComplex* x, int incx, hipblasStride stridex, int batchCount) { return hipblasZtpsvStridedBatched(handle, uplo, transA, diag, m, (const hipDoubleComplex*)AP, strideAP, (hipDoubleComplex*)x, incx, stridex, batchCount); } // tpsv_strided_batched_64 hipblasStatus_t hipblasCtpsvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasComplex* AP, hipblasStride strideAP, hipblasComplex* x, int64_t incx, hipblasStride stridex, int64_t batchCount) { return hipblasCtpsvStridedBatched_64(handle, uplo, transA, diag, m, (const hipComplex*)AP, strideAP, (hipComplex*)x, incx, stridex, batchCount); } hipblasStatus_t hipblasZtpsvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasDoubleComplex* AP, hipblasStride strideAP, hipblasDoubleComplex* x, int64_t incx, hipblasStride stridex, int64_t batchCount) { return hipblasZtpsvStridedBatched_64(handle, uplo, transA, diag, m, (const hipDoubleComplex*)AP, strideAP, (hipDoubleComplex*)x, incx, stridex, batchCount); } // trmv hipblasStatus_t hipblasCtrmvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* A, int lda, hipblasComplex* x, int incx) { return hipblasCtrmv( handle, uplo, transA, diag, m, (const hipComplex*)A, lda, (hipComplex*)x, incx); } hipblasStatus_t hipblasZtrmvCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* x, int incx) { return hipblasZtrmv( handle, uplo, transA, diag, m, (const hipDoubleComplex*)A, lda, (hipDoubleComplex*)x, incx); } // trmv_64 hipblasStatus_t hipblasCtrmvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasComplex* A, int64_t lda, hipblasComplex* x, int64_t incx) { return hipblasCtrmv_64( handle, uplo, transA, diag, m, (const hipComplex*)A, lda, (hipComplex*)x, incx); } hipblasStatus_t hipblasZtrmvCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* x, int64_t incx) { return hipblasZtrmv_64( handle, uplo, transA, diag, m, (const hipDoubleComplex*)A, lda, (hipDoubleComplex*)x, incx); } // trmv_batched hipblasStatus_t hipblasCtrmvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* const A[], int lda, hipblasComplex* const x[], int incx, int batch_count) { return hipblasCtrmvBatched(handle, uplo, transA, diag, m, (const hipComplex* const*)A, lda, (hipComplex* const*)x, incx, batch_count); } hipblasStatus_t hipblasZtrmvBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const x[], int incx, int batch_count) { return hipblasZtrmvBatched(handle, uplo, transA, diag, m, (const hipDoubleComplex* const*)A, lda, (hipDoubleComplex* const*)x, incx, batch_count); } // trmv_batched_64 hipblasStatus_t hipblasCtrmvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasComplex* const A[], int64_t lda, hipblasComplex* const x[], int64_t incx, int64_t batch_count) { return hipblasCtrmvBatched_64(handle, uplo, transA, diag, m, (const hipComplex* const*)A, lda, (hipComplex* const*)x, incx, batch_count); } hipblasStatus_t hipblasZtrmvBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasDoubleComplex* const A[], int64_t lda, hipblasDoubleComplex* const x[], int64_t incx, int64_t batch_count) { return hipblasZtrmvBatched_64(handle, uplo, transA, diag, m, (const hipDoubleComplex* const*)A, lda, (hipDoubleComplex* const*)x, incx, batch_count); } // trmv_strided_batched hipblasStatus_t hipblasCtrmvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex* A, int lda, hipblasStride stride_a, hipblasComplex* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasCtrmvStridedBatched(handle, uplo, transA, diag, m, (const hipComplex*)A, lda, stride_a, (hipComplex*)x, incx, stride_x, batch_count); } hipblasStatus_t hipblasZtrmvStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex* A, int lda, hipblasStride stride_a, hipblasDoubleComplex* x, int incx, hipblasStride stride_x, int batch_count) { return hipblasZtrmvStridedBatched(handle, uplo, transA, diag, m, (const hipDoubleComplex*)A, lda, stride_a, (hipDoubleComplex*)x, incx, stride_x, batch_count); } // trmv_strided_batched_64 hipblasStatus_t hipblasCtrmvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasComplex* A, int64_t lda, hipblasStride stride_a, hipblasComplex* x, int64_t incx, hipblasStride stride_x, int64_t batch_count) { return hipblasCtrmvStridedBatched_64(handle, uplo, transA, diag, m, (const hipComplex*)A, lda, stride_a, (hipComplex*)x, incx, stride_x, batch_count); } hipblasStatus_t hipblasZtrmvStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, const hipblasDoubleComplex* A, int64_t lda, hipblasStride stride_a, hipblasDoubleComplex* x, int64_t incx, hipblasStride stride_x, int64_t batch_count) { return hipblasZtrmvStridedBatched_64(handle, uplo, transA, diag, m, (const hipDoubleComplex*)A, lda, stride_a, (hipDoubleComplex*)x, incx, stride_x, batch_count); } /* * =========================================================================== * level 3 BLAS * =========================================================================== */ // trtri hipblasStatus_t hipblasCtrtriCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, const hipblasComplex* A, int lda, hipblasComplex* invA, int ldinvA) { return hipblasCtrtri( handle, uplo, diag, n, (const hipComplex*)A, lda, (hipComplex*)invA, ldinvA); } hipblasStatus_t hipblasZtrtriCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* invA, int ldinvA) { return hipblasZtrtri( handle, uplo, diag, n, (const hipDoubleComplex*)A, lda, (hipDoubleComplex*)invA, ldinvA); } // trtri_batched hipblasStatus_t hipblasCtrtriBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, const hipblasComplex* const A[], int lda, hipblasComplex* invA[], int ldinvA, int batch_count) { return hipblasCtrtriBatched(handle, uplo, diag, n, (const hipComplex* const*)A, lda, (hipComplex**)invA, ldinvA, batch_count); } hipblasStatus_t hipblasZtrtriBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* invA[], int ldinvA, int batch_count) { return hipblasZtrtriBatched(handle, uplo, diag, n, (const hipDoubleComplex* const*)A, lda, (hipDoubleComplex**)invA, ldinvA, batch_count); } // trtri_strided_batched hipblasStatus_t hipblasCtrtriStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, const hipblasComplex* A, int lda, hipblasStride stride_A, hipblasComplex* invA, int ldinvA, hipblasStride stride_invA, int batch_count) { return hipblasCtrtriStridedBatched(handle, uplo, diag, n, (const hipComplex*)A, lda, stride_A, (hipComplex*)invA, ldinvA, stride_invA, batch_count); } hipblasStatus_t hipblasZtrtriStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, const hipblasDoubleComplex* A, int lda, hipblasStride stride_A, hipblasDoubleComplex* invA, int ldinvA, hipblasStride stride_invA, int batch_count) { return hipblasZtrtriStridedBatched(handle, uplo, diag, n, (const hipDoubleComplex*)A, lda, stride_A, (hipDoubleComplex*)invA, ldinvA, stride_invA, batch_count); } // dgmm hipblasStatus_t hipblasCdgmmCast(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasComplex* A, int lda, const hipblasComplex* x, int incx, hipblasComplex* C, int ldc) { return hipblasCdgmm(handle, side, m, n, (const hipComplex*)A, lda, (const hipComplex*)x, incx, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZdgmmCast(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* x, int incx, hipblasDoubleComplex* C, int ldc) { return hipblasZdgmm(handle, side, m, n, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)x, incx, (hipDoubleComplex*)C, ldc); } // dgmm_batched hipblasStatus_t hipblasCdgmmBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasComplex* const A[], int lda, const hipblasComplex* const x[], int incx, hipblasComplex* const C[], int ldc, int batch_count) { return hipblasCdgmmBatched(handle, side, m, n, (const hipComplex* const*)A, lda, (const hipComplex* const*)x, incx, (hipComplex* const*)C, ldc, batch_count); } hipblasStatus_t hipblasZdgmmBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const x[], int incx, hipblasDoubleComplex* const C[], int ldc, int batch_count) { return hipblasZdgmmBatched(handle, side, m, n, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)C, ldc, batch_count); } // dgmm_strided_batched hipblasStatus_t hipblasCdgmmStridedBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasComplex* A, int lda, hipblasStride stride_A, const hipblasComplex* x, int incx, hipblasStride stride_x, hipblasComplex* C, int ldc, hipblasStride stride_C, int batch_count) { return hipblasCdgmmStridedBatched(handle, side, m, n, (const hipComplex*)A, lda, stride_A, (const hipComplex*)x, incx, stride_x, (hipComplex*)C, ldc, stride_C, batch_count); } hipblasStatus_t hipblasZdgmmStridedBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasDoubleComplex* A, int lda, hipblasStride stride_A, const hipblasDoubleComplex* x, int incx, hipblasStride stride_x, hipblasDoubleComplex* C, int ldc, hipblasStride stride_C, int batch_count) { return hipblasZdgmmStridedBatched(handle, side, m, n, (const hipDoubleComplex*)A, lda, stride_A, (const hipDoubleComplex*)x, incx, stride_x, (hipDoubleComplex*)C, ldc, stride_C, batch_count); } // dgmm_64 hipblasStatus_t hipblasCdgmmCast_64(hipblasHandle_t handle, hipblasSideMode_t side, int64_t m, int64_t n, const hipblasComplex* A, int64_t lda, const hipblasComplex* x, int64_t incx, hipblasComplex* C, int64_t ldc) { return hipblasCdgmm_64(handle, side, m, n, (const hipComplex*)A, lda, (const hipComplex*)x, incx, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZdgmmCast_64(hipblasHandle_t handle, hipblasSideMode_t side, int64_t m, int64_t n, const hipblasDoubleComplex* A, int64_t lda, const hipblasDoubleComplex* x, int64_t incx, hipblasDoubleComplex* C, int64_t ldc) { return hipblasZdgmm_64(handle, side, m, n, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)x, incx, (hipDoubleComplex*)C, ldc); } // dgmm_batched_64 hipblasStatus_t hipblasCdgmmBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, int64_t m, int64_t n, const hipblasComplex* const A[], int64_t lda, const hipblasComplex* const x[], int64_t incx, hipblasComplex* const C[], int64_t ldc, int64_t batch_count) { return hipblasCdgmmBatched_64(handle, side, m, n, (const hipComplex* const*)A, lda, (const hipComplex* const*)x, incx, (hipComplex* const*)C, ldc, batch_count); } hipblasStatus_t hipblasZdgmmBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, int64_t m, int64_t n, const hipblasDoubleComplex* const A[], int64_t lda, const hipblasDoubleComplex* const x[], int64_t incx, hipblasDoubleComplex* const C[], int64_t ldc, int64_t batch_count) { return hipblasZdgmmBatched_64(handle, side, m, n, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)x, incx, (hipDoubleComplex* const*)C, ldc, batch_count); } // dgmm_strided_batched_64 hipblasStatus_t hipblasCdgmmStridedBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, int64_t m, int64_t n, const hipblasComplex* A, int64_t lda, hipblasStride stride_A, const hipblasComplex* x, int64_t incx, hipblasStride stride_x, hipblasComplex* C, int64_t ldc, hipblasStride stride_C, int64_t batch_count) { return hipblasCdgmmStridedBatched_64(handle, side, m, n, (const hipComplex*)A, lda, stride_A, (const hipComplex*)x, incx, stride_x, (hipComplex*)C, ldc, stride_C, batch_count); } hipblasStatus_t hipblasZdgmmStridedBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, int64_t m, int64_t n, const hipblasDoubleComplex* A, int64_t lda, hipblasStride stride_A, const hipblasDoubleComplex* x, int64_t incx, hipblasStride stride_x, hipblasDoubleComplex* C, int64_t ldc, hipblasStride stride_C, int64_t batch_count) { return hipblasZdgmmStridedBatched_64(handle, side, m, n, (const hipDoubleComplex*)A, lda, stride_A, (const hipDoubleComplex*)x, incx, stride_x, (hipDoubleComplex*)C, ldc, stride_C, batch_count); } // gemm hipblasStatus_t hipblasCgemmCast(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc) { return hipblasCgemm(handle, transA, transB, m, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)B, ldb, (const hipComplex*)beta, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZgemmCast(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZgemm(handle, transA, transB, m, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc); } // gemm_batched hipblasStatus_t hipblasCgemmBatchedCast(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batch_count) { return hipblasCgemmBatched(handle, transA, transB, m, n, k, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)B, ldb, (const hipComplex*)beta, (hipComplex* const*)C, ldc, batch_count); } hipblasStatus_t hipblasZgemmBatchedCast(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batch_count) { return hipblasZgemmBatched(handle, transA, transB, m, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)C, ldc, batch_count); } // gemm_strided_batched hipblasStatus_t hipblasCgemmStridedBatchedCast(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, int bsa, const hipblasComplex* B, int ldb, int bsb, const hipblasComplex* beta, hipblasComplex* C, int ldc, int bsc, int batch_count) { return hipblasCgemmStridedBatched(handle, transA, transB, m, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, bsa, (const hipComplex*)B, ldb, bsb, (const hipComplex*)beta, (hipComplex*)C, ldc, bsc, batch_count); } hipblasStatus_t hipblasZgemmStridedBatchedCast(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, int bsa, const hipblasDoubleComplex* B, int ldb, int bsb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, int bsc, int batch_count) { return hipblasZgemmStridedBatched(handle, transA, transB, m, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, bsa, (const hipDoubleComplex*)B, ldb, bsb, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc, bsc, batch_count); } // gemm_64 hipblasStatus_t hipblasCgemmCast_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, const hipblasComplex* B, int64_t ldb, const hipblasComplex* beta, hipblasComplex* C, int64_t ldc) { return hipblasCgemm_64(handle, transA, transB, m, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)B, ldb, (const hipComplex*)beta, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZgemmCast_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, const hipblasDoubleComplex* B, int64_t ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int64_t ldc) { return hipblasZgemm_64(handle, transA, transB, m, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc); } // gemm_batched_64 hipblasStatus_t hipblasCgemmBatchedCast_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* const A[], int64_t lda, const hipblasComplex* const B[], int64_t ldb, const hipblasComplex* beta, hipblasComplex* const C[], int64_t ldc, int64_t batch_count) { return hipblasCgemmBatched_64(handle, transA, transB, m, n, k, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)B, ldb, (const hipComplex*)beta, (hipComplex* const*)C, ldc, batch_count); } hipblasStatus_t hipblasZgemmBatchedCast_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int64_t lda, const hipblasDoubleComplex* const B[], int64_t ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int64_t ldc, int64_t batch_count) { return hipblasZgemmBatched_64(handle, transA, transB, m, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)C, ldc, batch_count); } // gemm_strided_batched_64 hipblasStatus_t hipblasCgemmStridedBatchedCast_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, int64_t bsa, const hipblasComplex* B, int64_t ldb, int64_t bsb, const hipblasComplex* beta, hipblasComplex* C, int64_t ldc, int64_t bsc, int64_t batch_count) { return hipblasCgemmStridedBatched_64(handle, transA, transB, m, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, bsa, (const hipComplex*)B, ldb, bsb, (const hipComplex*)beta, (hipComplex*)C, ldc, bsc, batch_count); } hipblasStatus_t hipblasZgemmStridedBatchedCast_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, int64_t bsa, const hipblasDoubleComplex* B, int64_t ldb, int64_t bsb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int64_t ldc, int64_t bsc, int64_t batch_count) { return hipblasZgemmStridedBatched_64(handle, transA, transB, m, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, bsa, (const hipDoubleComplex*)B, ldb, bsb, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc, bsc, batch_count); } // herk hipblasStatus_t hipblasCherkCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const hipblasComplex* A, int lda, const float* beta, hipblasComplex* C, int ldc) { return hipblasCherk( handle, uplo, transA, n, k, alpha, (const hipComplex*)A, lda, beta, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZherkCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const hipblasDoubleComplex* A, int lda, const double* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZherk(handle, uplo, transA, n, k, alpha, (const hipDoubleComplex*)A, lda, beta, (hipDoubleComplex*)C, ldc); } // herk_batched hipblasStatus_t hipblasCherkBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const hipblasComplex* const A[], int lda, const float* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCherkBatched(handle, uplo, transA, n, k, alpha, (const hipComplex* const*)A, lda, beta, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZherkBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const hipblasDoubleComplex* const A[], int lda, const double* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZherkBatched(handle, uplo, transA, n, k, alpha, (const hipDoubleComplex* const*)A, lda, beta, (hipDoubleComplex* const*)C, ldc, batchCount); } // herk_strided_batched hipblasStatus_t hipblasCherkStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const float* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCherkStridedBatched(handle, uplo, transA, n, k, alpha, (const hipComplex*)A, lda, strideA, beta, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZherkStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const double* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZherkStridedBatched(handle, uplo, transA, n, k, alpha, (const hipDoubleComplex*)A, lda, strideA, beta, (hipDoubleComplex*)C, ldc, strideC, batchCount); } // herk_64 hipblasStatus_t hipblasCherkCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const float* alpha, const hipblasComplex* A, int64_t lda, const float* beta, hipblasComplex* C, int64_t ldc) { return hipblasCherk_64( handle, uplo, transA, n, k, alpha, (const hipComplex*)A, lda, beta, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZherkCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const double* alpha, const hipblasDoubleComplex* A, int64_t lda, const double* beta, hipblasDoubleComplex* C, int64_t ldc) { return hipblasZherk_64(handle, uplo, transA, n, k, alpha, (const hipDoubleComplex*)A, lda, beta, (hipDoubleComplex*)C, ldc); } // herk_batched_64 hipblasStatus_t hipblasCherkBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const float* alpha, const hipblasComplex* const A[], int64_t lda, const float* beta, hipblasComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasCherkBatched_64(handle, uplo, transA, n, k, alpha, (const hipComplex* const*)A, lda, beta, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZherkBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const double* alpha, const hipblasDoubleComplex* const A[], int64_t lda, const double* beta, hipblasDoubleComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasZherkBatched_64(handle, uplo, transA, n, k, alpha, (const hipDoubleComplex* const*)A, lda, beta, (hipDoubleComplex* const*)C, ldc, batchCount); } // herk_strided_batched_64 hipblasStatus_t hipblasCherkStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const float* alpha, const hipblasComplex* A, int64_t lda, hipblasStride strideA, const float* beta, hipblasComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasCherkStridedBatched_64(handle, uplo, transA, n, k, alpha, (const hipComplex*)A, lda, strideA, beta, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZherkStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const double* alpha, const hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, const double* beta, hipblasDoubleComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasZherkStridedBatched_64(handle, uplo, transA, n, k, alpha, (const hipDoubleComplex*)A, lda, strideA, beta, (hipDoubleComplex*)C, ldc, strideC, batchCount); } // her2k hipblasStatus_t hipblasCher2kCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const float* beta, hipblasComplex* C, int ldc) { return hipblasCher2k(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)B, ldb, beta, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZher2kCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const double* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZher2k(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)B, ldb, beta, (hipDoubleComplex*)C, ldc); } // her2k_batched hipblasStatus_t hipblasCher2kBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const float* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCher2kBatched(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)B, ldb, beta, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZher2kBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const double* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZher2kBatched(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)B, ldb, beta, (hipDoubleComplex* const*)C, ldc, batchCount); } // her2k_strided_batched hipblasStatus_t hipblasCher2kStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const float* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCher2kStridedBatched(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)B, ldb, strideB, beta, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZher2kStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const double* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZher2kStridedBatched(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)B, ldb, strideB, beta, (hipDoubleComplex*)C, ldc, strideC, batchCount); } // her2k_64 hipblasStatus_t hipblasCher2kCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, const hipblasComplex* B, int64_t ldb, const float* beta, hipblasComplex* C, int64_t ldc) { return hipblasCher2k_64(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)B, ldb, beta, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZher2kCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, const hipblasDoubleComplex* B, int64_t ldb, const double* beta, hipblasDoubleComplex* C, int64_t ldc) { return hipblasZher2k_64(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)B, ldb, beta, (hipDoubleComplex*)C, ldc); } // her2k_batched_64 hipblasStatus_t hipblasCher2kBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* const A[], int64_t lda, const hipblasComplex* const B[], int64_t ldb, const float* beta, hipblasComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasCher2kBatched_64(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)B, ldb, beta, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZher2kBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int64_t lda, const hipblasDoubleComplex* const B[], int64_t ldb, const double* beta, hipblasDoubleComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasZher2kBatched_64(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)B, ldb, beta, (hipDoubleComplex* const*)C, ldc, batchCount); } // her2k_strided_batched_64 hipblasStatus_t hipblasCher2kStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, hipblasStride strideA, const hipblasComplex* B, int64_t ldb, hipblasStride strideB, const float* beta, hipblasComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasCher2kStridedBatched_64(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)B, ldb, strideB, beta, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZher2kStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, const hipblasDoubleComplex* B, int64_t ldb, hipblasStride strideB, const double* beta, hipblasDoubleComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasZher2kStridedBatched_64(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)B, ldb, strideB, beta, (hipDoubleComplex*)C, ldc, strideC, batchCount); } // herkx hipblasStatus_t hipblasCherkxCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const float* beta, hipblasComplex* C, int ldc) { return hipblasCherkx(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)B, ldb, beta, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZherkxCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const double* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZherkx(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)B, ldb, beta, (hipDoubleComplex*)C, ldc); } // herkx_batched hipblasStatus_t hipblasCherkxBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const float* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCherkxBatched(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)B, ldb, beta, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZherkxBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const double* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZherkxBatched(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)B, ldb, beta, (hipDoubleComplex* const*)C, ldc, batchCount); } // herkx_strided_batched hipblasStatus_t hipblasCherkxStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const float* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCherkxStridedBatched(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)B, ldb, strideB, beta, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZherkxStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const double* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZherkxStridedBatched(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)B, ldb, strideB, beta, (hipDoubleComplex*)C, ldc, strideC, batchCount); } // herkx_64 hipblasStatus_t hipblasCherkxCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, const hipblasComplex* B, int64_t ldb, const float* beta, hipblasComplex* C, int64_t ldc) { return hipblasCherkx_64(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)B, ldb, beta, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZherkxCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, const hipblasDoubleComplex* B, int64_t ldb, const double* beta, hipblasDoubleComplex* C, int64_t ldc) { return hipblasZherkx_64(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)B, ldb, beta, (hipDoubleComplex*)C, ldc); } // herkx_batched_64 hipblasStatus_t hipblasCherkxBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* const A[], int64_t lda, const hipblasComplex* const B[], int64_t ldb, const float* beta, hipblasComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasCherkxBatched_64(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)B, ldb, beta, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZherkxBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int64_t lda, const hipblasDoubleComplex* const B[], int64_t ldb, const double* beta, hipblasDoubleComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasZherkxBatched_64(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)B, ldb, beta, (hipDoubleComplex* const*)C, ldc, batchCount); } // herkx_strided_batched_64 hipblasStatus_t hipblasCherkxStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, hipblasStride strideA, const hipblasComplex* B, int64_t ldb, hipblasStride strideB, const float* beta, hipblasComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasCherkxStridedBatched_64(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)B, ldb, strideB, beta, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZherkxStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, const hipblasDoubleComplex* B, int64_t ldb, hipblasStride strideB, const double* beta, hipblasDoubleComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasZherkxStridedBatched_64(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)B, ldb, strideB, beta, (hipDoubleComplex*)C, ldc, strideC, batchCount); } // symm hipblasStatus_t hipblasCsymmCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc) { return hipblasCsymm(handle, side, uplo, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)B, ldb, (const hipComplex*)beta, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZsymmCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZsymm(handle, side, uplo, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc); } // symm_batched hipblasStatus_t hipblasCsymmBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCsymmBatched(handle, side, uplo, m, n, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)B, ldb, (const hipComplex*)beta, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZsymmBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZsymmBatched(handle, side, uplo, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)C, ldc, batchCount); } // symm_strided_batched hipblasStatus_t hipblasCsymmStridedBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCsymmStridedBatched(handle, side, uplo, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)B, ldb, strideB, (const hipComplex*)beta, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZsymmStridedBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZsymmStridedBatched(handle, side, uplo, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)B, ldb, strideB, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc, strideC, batchCount); } // symm_64 hipblasStatus_t hipblasCsymmCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, const hipblasComplex* B, int64_t ldb, const hipblasComplex* beta, hipblasComplex* C, int64_t ldc) { return hipblasCsymm_64(handle, side, uplo, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)B, ldb, (const hipComplex*)beta, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZsymmCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, const hipblasDoubleComplex* B, int64_t ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int64_t ldc) { return hipblasZsymm_64(handle, side, uplo, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc); } // symm_batched_64 hipblasStatus_t hipblasCsymmBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* const A[], int64_t lda, const hipblasComplex* const B[], int64_t ldb, const hipblasComplex* beta, hipblasComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasCsymmBatched_64(handle, side, uplo, m, n, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)B, ldb, (const hipComplex*)beta, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZsymmBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int64_t lda, const hipblasDoubleComplex* const B[], int64_t ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasZsymmBatched_64(handle, side, uplo, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)C, ldc, batchCount); } // symm_strided_batched_64 hipblasStatus_t hipblasCsymmStridedBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, hipblasStride strideA, const hipblasComplex* B, int64_t ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasCsymmStridedBatched_64(handle, side, uplo, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)B, ldb, strideB, (const hipComplex*)beta, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZsymmStridedBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, const hipblasDoubleComplex* B, int64_t ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasZsymmStridedBatched_64(handle, side, uplo, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)B, ldb, strideB, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc, strideC, batchCount); } // syrk hipblasStatus_t hipblasCsyrkCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* beta, hipblasComplex* C, int ldc) { return hipblasCsyrk(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)beta, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZsyrkCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZsyrk(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc); } // syrk_batched hipblasStatus_t hipblasCsyrkBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCsyrkBatched(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex*)beta, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZsyrkBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZsyrkBatched(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)C, ldc, batchCount); } // syrk_strided_batched hipblasStatus_t hipblasCsyrkStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCsyrkStridedBatched(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)beta, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZsyrkStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZsyrkStridedBatched(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc, strideC, batchCount); } // syrk_64 hipblasStatus_t hipblasCsyrkCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, const hipblasComplex* beta, hipblasComplex* C, int64_t ldc) { return hipblasCsyrk_64(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)beta, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZsyrkCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int64_t ldc) { return hipblasZsyrk_64(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc); } // syrk_batched_64 hipblasStatus_t hipblasCsyrkBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* const A[], int64_t lda, const hipblasComplex* beta, hipblasComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasCsyrkBatched_64(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex*)beta, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZsyrkBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int64_t lda, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasZsyrkBatched_64(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)C, ldc, batchCount); } // syrk_strided_batched_64 hipblasStatus_t hipblasCsyrkStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, hipblasStride strideA, const hipblasComplex* beta, hipblasComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasCsyrkStridedBatched_64(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)beta, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZsyrkStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasZsyrkStridedBatched_64(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc, strideC, batchCount); } // syr2k hipblasStatus_t hipblasCsyr2kCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc) { return hipblasCsyr2k(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)B, ldb, (const hipComplex*)beta, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZsyr2kCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZsyr2k(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc); } // syr2k_batched hipblasStatus_t hipblasCsyr2kBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCsyr2kBatched(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)B, ldb, (const hipComplex*)beta, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZsyr2kBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZsyr2kBatched(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)C, ldc, batchCount); } // syr2k_strided_batched hipblasStatus_t hipblasCsyr2kStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCsyr2kStridedBatched(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)B, ldb, strideB, (const hipComplex*)beta, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZsyr2kStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZsyr2kStridedBatched(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)B, ldb, strideB, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc, strideC, batchCount); } // syr2k_64 hipblasStatus_t hipblasCsyr2kCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, const hipblasComplex* B, int64_t ldb, const hipblasComplex* beta, hipblasComplex* C, int64_t ldc) { return hipblasCsyr2k_64(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)B, ldb, (const hipComplex*)beta, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZsyr2kCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, const hipblasDoubleComplex* B, int64_t ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int64_t ldc) { return hipblasZsyr2k_64(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc); } // syr2k_batched_64 hipblasStatus_t hipblasCsyr2kBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* const A[], int64_t lda, const hipblasComplex* const B[], int64_t ldb, const hipblasComplex* beta, hipblasComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasCsyr2kBatched_64(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)B, ldb, (const hipComplex*)beta, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZsyr2kBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int64_t lda, const hipblasDoubleComplex* const B[], int64_t ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasZsyr2kBatched_64(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)C, ldc, batchCount); } // syr2k_strided_batched_64 hipblasStatus_t hipblasCsyr2kStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, hipblasStride strideA, const hipblasComplex* B, int64_t ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasCsyr2kStridedBatched_64(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)B, ldb, strideB, (const hipComplex*)beta, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZsyr2kStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, const hipblasDoubleComplex* B, int64_t ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasZsyr2kStridedBatched_64(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)B, ldb, strideB, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc, strideC, batchCount); } // syrkx hipblasStatus_t hipblasCsyrkxCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc) { return hipblasCsyrkx(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)B, ldb, (const hipComplex*)beta, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZsyrkxCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZsyrkx(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc); } // syrkx_batched hipblasStatus_t hipblasCsyrkxBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCsyrkxBatched(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)B, ldb, (const hipComplex*)beta, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZsyrkxBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZsyrkxBatched(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)C, ldc, batchCount); } // syrkx_strided_batched hipblasStatus_t hipblasCsyrkxStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCsyrkxStridedBatched(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)B, ldb, strideB, (const hipComplex*)beta, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZsyrkxStridedBatchedCast(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZsyrkxStridedBatched(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)B, ldb, strideB, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc, strideC, batchCount); } // syrkx_64 hipblasStatus_t hipblasCsyrkxCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, const hipblasComplex* B, int64_t ldb, const hipblasComplex* beta, hipblasComplex* C, int64_t ldc) { return hipblasCsyrkx_64(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)B, ldb, (const hipComplex*)beta, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZsyrkxCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, const hipblasDoubleComplex* B, int64_t ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int64_t ldc) { return hipblasZsyrkx_64(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc); } // syrkx_batched_64 hipblasStatus_t hipblasCsyrkxBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* const A[], int64_t lda, const hipblasComplex* const B[], int64_t ldb, const hipblasComplex* beta, hipblasComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasCsyrkxBatched_64(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)B, ldb, (const hipComplex*)beta, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZsyrkxBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int64_t lda, const hipblasDoubleComplex* const B[], int64_t ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasZsyrkxBatched_64(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)C, ldc, batchCount); } // syrkx_strided_batched_64 hipblasStatus_t hipblasCsyrkxStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, hipblasStride strideA, const hipblasComplex* B, int64_t ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasCsyrkxStridedBatched_64(handle, uplo, transA, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)B, ldb, strideB, (const hipComplex*)beta, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZsyrkxStridedBatchedCast_64(hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, const hipblasDoubleComplex* B, int64_t ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasZsyrkxStridedBatched_64(handle, uplo, transA, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)B, ldb, strideB, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc, strideC, batchCount); } // hemm hipblasStatus_t hipblasChemmCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, const hipblasComplex* beta, hipblasComplex* C, int ldc) { return hipblasChemm(handle, side, uplo, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)B, ldb, (const hipComplex*)beta, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZhemmCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc) { return hipblasZhemm(handle, side, uplo, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc); } // hemm_batched hipblasStatus_t hipblasChemmBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, const hipblasComplex* beta, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasChemmBatched(handle, side, uplo, n, k, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)B, ldb, (const hipComplex*)beta, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZhemmBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZhemmBatched(handle, side, uplo, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)C, ldc, batchCount); } // hemm_strided_batched hipblasStatus_t hipblasChemmStridedBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasChemmStridedBatched(handle, side, uplo, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)B, ldb, strideB, (const hipComplex*)beta, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZhemmStridedBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZhemmStridedBatched(handle, side, uplo, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)B, ldb, strideB, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc, strideC, batchCount); } // hemm_64 hipblasStatus_t hipblasChemmCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, const hipblasComplex* B, int64_t ldb, const hipblasComplex* beta, hipblasComplex* C, int64_t ldc) { return hipblasChemm_64(handle, side, uplo, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)B, ldb, (const hipComplex*)beta, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZhemmCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, const hipblasDoubleComplex* B, int64_t ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int64_t ldc) { return hipblasZhemm_64(handle, side, uplo, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc); } // hemm_batched_64 hipblasStatus_t hipblasChemmBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* const A[], int64_t lda, const hipblasComplex* const B[], int64_t ldb, const hipblasComplex* beta, hipblasComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasChemmBatched_64(handle, side, uplo, n, k, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)B, ldb, (const hipComplex*)beta, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZhemmBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int64_t lda, const hipblasDoubleComplex* const B[], int64_t ldb, const hipblasDoubleComplex* beta, hipblasDoubleComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasZhemmBatched_64(handle, side, uplo, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)B, ldb, (const hipDoubleComplex*)beta, (hipDoubleComplex* const*)C, ldc, batchCount); } // hemm_strided_batched_64 hipblasStatus_t hipblasChemmStridedBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t n, int64_t k, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, hipblasStride strideA, const hipblasComplex* B, int64_t ldb, hipblasStride strideB, const hipblasComplex* beta, hipblasComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasChemmStridedBatched_64(handle, side, uplo, n, k, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)B, ldb, strideB, (const hipComplex*)beta, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZhemmStridedBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int64_t n, int64_t k, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, const hipblasDoubleComplex* B, int64_t ldb, hipblasStride strideB, const hipblasDoubleComplex* beta, hipblasDoubleComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasZhemmStridedBatched_64(handle, side, uplo, n, k, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)B, ldb, strideB, (const hipDoubleComplex*)beta, (hipDoubleComplex*)C, ldc, strideC, batchCount); } // trmm hipblasStatus_t hipblasCtrmmCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* B, int ldb, hipblasComplex* C, int ldc) { return hipblasCtrmm(handle, side, uplo, transA, diag, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)B, ldb, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZtrmmCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* B, int ldb, hipblasDoubleComplex* C, int ldc) { return hipblasZtrmm(handle, side, uplo, transA, diag, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)B, ldb, (hipDoubleComplex*)C, ldc); } // trmm_batched hipblasStatus_t hipblasCtrmmBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* const B[], int ldb, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCtrmmBatched(handle, side, uplo, transA, diag, m, n, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)B, ldb, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZtrmmBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* const B[], int ldb, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZtrmmBatched(handle, side, uplo, transA, diag, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)B, ldb, (hipDoubleComplex* const*)C, ldc, batchCount); } // trmm_strided_batched hipblasStatus_t hipblasCtrmmStridedBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* B, int ldb, hipblasStride strideB, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCtrmmStridedBatched(handle, side, uplo, transA, diag, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)B, ldb, strideB, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZtrmmStridedBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZtrmmStridedBatched(handle, side, uplo, transA, diag, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)B, ldb, strideB, (hipDoubleComplex*)C, ldc, strideC, batchCount); } // trmm_64 hipblasStatus_t hipblasCtrmmCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, const hipblasComplex* B, int64_t ldb, hipblasComplex* C, int64_t ldc) { return hipblasCtrmm_64(handle, side, uplo, transA, diag, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)B, ldb, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZtrmmCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, const hipblasDoubleComplex* B, int64_t ldb, hipblasDoubleComplex* C, int64_t ldc) { return hipblasZtrmm_64(handle, side, uplo, transA, diag, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)B, ldb, (hipDoubleComplex*)C, ldc); } // trmm_batched_64 hipblasStatus_t hipblasCtrmmBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* const A[], int64_t lda, const hipblasComplex* const B[], int64_t ldb, hipblasComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasCtrmmBatched_64(handle, side, uplo, transA, diag, m, n, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex* const*)B, ldb, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZtrmmBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int64_t lda, const hipblasDoubleComplex* const B[], int64_t ldb, hipblasDoubleComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasZtrmmBatched_64(handle, side, uplo, transA, diag, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex* const*)B, ldb, (hipDoubleComplex* const*)C, ldc, batchCount); } // trmm_strided_batched_64 hipblasStatus_t hipblasCtrmmStridedBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, hipblasStride strideA, const hipblasComplex* B, int64_t ldb, hipblasStride strideB, hipblasComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasCtrmmStridedBatched_64(handle, side, uplo, transA, diag, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)B, ldb, strideB, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZtrmmStridedBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, const hipblasDoubleComplex* B, int64_t ldb, hipblasStride strideB, hipblasDoubleComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasZtrmmStridedBatched_64(handle, side, uplo, transA, diag, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)B, ldb, strideB, (hipDoubleComplex*)C, ldc, strideC, batchCount); } // trsm hipblasStatus_t hipblasCtrsmCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasComplex* B, int ldb) { return hipblasCtrsm(handle, side, uplo, transA, diag, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, (hipComplex*)B, ldb); } hipblasStatus_t hipblasZtrsmCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasDoubleComplex* B, int ldb) { return hipblasZtrsm(handle, side, uplo, transA, diag, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb); } // trsm_batched hipblasStatus_t hipblasCtrsmBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, hipblasComplex* const B[], int ldb, int batch_count) { return hipblasCtrsmBatched(handle, side, uplo, transA, diag, m, n, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (hipComplex* const*)B, ldb, batch_count); } hipblasStatus_t hipblasZtrsmBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, hipblasDoubleComplex* const B[], int ldb, int batch_count) { return hipblasZtrsmBatched(handle, side, uplo, transA, diag, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (hipDoubleComplex* const*)B, ldb, batch_count); } // trsm_strided_batched hipblasStatus_t hipblasCtrsmStridedBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, hipblasComplex* B, int ldb, hipblasStride strideB, int batch_count) { return hipblasCtrsmStridedBatched(handle, side, uplo, transA, diag, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (hipComplex*)B, ldb, strideB, batch_count); } hipblasStatus_t hipblasZtrsmStridedBatchedCast(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, hipblasDoubleComplex* B, int ldb, hipblasStride strideB, int batch_count) { return hipblasZtrsmStridedBatched(handle, side, uplo, transA, diag, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (hipDoubleComplex*)B, ldb, strideB, batch_count); } // trsm_64 hipblasStatus_t hipblasCtrsmCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, hipblasComplex* B, int64_t ldb) { return hipblasCtrsm_64(handle, side, uplo, transA, diag, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, (hipComplex*)B, ldb); } hipblasStatus_t hipblasZtrsmCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, hipblasDoubleComplex* B, int64_t ldb) { return hipblasZtrsm_64(handle, side, uplo, transA, diag, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb); } // trsm_batched_64 hipblasStatus_t hipblasCtrsmBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* const A[], int64_t lda, hipblasComplex* const B[], int64_t ldb, int64_t batch_count) { return hipblasCtrsmBatched_64(handle, side, uplo, transA, diag, m, n, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (hipComplex* const*)B, ldb, batch_count); } hipblasStatus_t hipblasZtrsmBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int64_t lda, hipblasDoubleComplex* const B[], int64_t ldb, int64_t batch_count) { return hipblasZtrsmBatched_64(handle, side, uplo, transA, diag, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (hipDoubleComplex* const*)B, ldb, batch_count); } // trsm_strided_batched_64 hipblasStatus_t hipblasCtrsmStridedBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, hipblasStride strideA, hipblasComplex* B, int64_t ldb, hipblasStride strideB, int64_t batch_count) { return hipblasCtrsmStridedBatched_64(handle, side, uplo, transA, diag, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (hipComplex*)B, ldb, strideB, batch_count); } hipblasStatus_t hipblasZtrsmStridedBatchedCast_64(hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, hipblasDoubleComplex* B, int64_t ldb, hipblasStride strideB, int64_t batch_count) { return hipblasZtrsmStridedBatched_64(handle, side, uplo, transA, diag, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (hipDoubleComplex*)B, ldb, strideB, batch_count); } // geam hipblasStatus_t hipblasCgeamCast(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, const hipblasComplex* beta, const hipblasComplex* B, int ldb, hipblasComplex* C, int ldc) { return hipblasCgeam(handle, transA, transB, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)beta, (const hipComplex*)B, ldb, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZgeamCast(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, const hipblasDoubleComplex* beta, const hipblasDoubleComplex* B, int ldb, hipblasDoubleComplex* C, int ldc) { return hipblasZgeam(handle, transA, transB, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)beta, (const hipDoubleComplex*)B, ldb, (hipDoubleComplex*)C, ldc); } // geam_batched hipblasStatus_t hipblasCgeamBatchedCast(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasComplex* alpha, const hipblasComplex* const A[], int lda, const hipblasComplex* beta, const hipblasComplex* const B[], int ldb, hipblasComplex* const C[], int ldc, int batchCount) { return hipblasCgeamBatched(handle, transA, transB, m, n, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex*)beta, (const hipComplex* const*)B, ldb, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZgeamBatchedCast(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int lda, const hipblasDoubleComplex* beta, const hipblasDoubleComplex* const B[], int ldb, hipblasDoubleComplex* const C[], int ldc, int batchCount) { return hipblasZgeamBatched(handle, transA, transB, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex*)beta, (const hipDoubleComplex* const*)B, ldb, (hipDoubleComplex* const*)C, ldc, batchCount); } // geam_strided_batched hipblasStatus_t hipblasCgeamStridedBatchedCast(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasComplex* alpha, const hipblasComplex* A, int lda, hipblasStride strideA, const hipblasComplex* beta, const hipblasComplex* B, int ldb, hipblasStride strideB, hipblasComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasCgeamStridedBatched(handle, transA, transB, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)beta, (const hipComplex*)B, ldb, strideB, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZgeamStridedBatchedCast(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int lda, hipblasStride strideA, const hipblasDoubleComplex* beta, const hipblasDoubleComplex* B, int ldb, hipblasStride strideB, hipblasDoubleComplex* C, int ldc, hipblasStride strideC, int batchCount) { return hipblasZgeamStridedBatched(handle, transA, transB, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)beta, (const hipDoubleComplex*)B, ldb, strideB, (hipDoubleComplex*)C, ldc, strideC, batchCount); } // geam_64 hipblasStatus_t hipblasCgeamCast_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, const hipblasComplex* beta, const hipblasComplex* B, int64_t ldb, hipblasComplex* C, int64_t ldc) { return hipblasCgeam_64(handle, transA, transB, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, (const hipComplex*)beta, (const hipComplex*)B, ldb, (hipComplex*)C, ldc); } hipblasStatus_t hipblasZgeamCast_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, const hipblasDoubleComplex* beta, const hipblasDoubleComplex* B, int64_t ldb, hipblasDoubleComplex* C, int64_t ldc) { return hipblasZgeam_64(handle, transA, transB, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, (const hipDoubleComplex*)beta, (const hipDoubleComplex*)B, ldb, (hipDoubleComplex*)C, ldc); } // geam_batched_64 hipblasStatus_t hipblasCgeamBatchedCast_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* const A[], int64_t lda, const hipblasComplex* beta, const hipblasComplex* const B[], int64_t ldb, hipblasComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasCgeamBatched_64(handle, transA, transB, m, n, (const hipComplex*)alpha, (const hipComplex* const*)A, lda, (const hipComplex*)beta, (const hipComplex* const*)B, ldb, (hipComplex* const*)C, ldc, batchCount); } hipblasStatus_t hipblasZgeamBatchedCast_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* const A[], int64_t lda, const hipblasDoubleComplex* beta, const hipblasDoubleComplex* const B[], int64_t ldb, hipblasDoubleComplex* const C[], int64_t ldc, int64_t batchCount) { return hipblasZgeamBatched_64(handle, transA, transB, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex* const*)A, lda, (const hipDoubleComplex*)beta, (const hipDoubleComplex* const*)B, ldb, (hipDoubleComplex* const*)C, ldc, batchCount); } // geam_strided_batched_64 hipblasStatus_t hipblasCgeamStridedBatchedCast_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* A, int64_t lda, hipblasStride strideA, const hipblasComplex* beta, const hipblasComplex* B, int64_t ldb, hipblasStride strideB, hipblasComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasCgeamStridedBatched_64(handle, transA, transB, m, n, (const hipComplex*)alpha, (const hipComplex*)A, lda, strideA, (const hipComplex*)beta, (const hipComplex*)B, ldb, strideB, (hipComplex*)C, ldc, strideC, batchCount); } hipblasStatus_t hipblasZgeamStridedBatchedCast_64(hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* A, int64_t lda, hipblasStride strideA, const hipblasDoubleComplex* beta, const hipblasDoubleComplex* B, int64_t ldb, hipblasStride strideB, hipblasDoubleComplex* C, int64_t ldc, hipblasStride strideC, int64_t batchCount) { return hipblasZgeamStridedBatched_64(handle, transA, transB, m, n, (const hipDoubleComplex*)alpha, (const hipDoubleComplex*)A, lda, strideA, (const hipDoubleComplex*)beta, (const hipDoubleComplex*)B, ldb, strideB, (hipDoubleComplex*)C, ldc, strideC, batchCount); } #ifdef __HIP_PLATFORM_SOLVER__ // getrf hipblasStatus_t hipblasCgetrfCast( hipblasHandle_t handle, const int n, hipblasComplex* A, const int lda, int* ipiv, int* info) { return hipblasCgetrf(handle, n, (hipComplex*)A, lda, ipiv, info); } hipblasStatus_t hipblasZgetrfCast(hipblasHandle_t handle, const int n, hipblasDoubleComplex* A, const int lda, int* ipiv, int* info) { return hipblasZgetrf(handle, n, (hipDoubleComplex*)A, lda, ipiv, info); } // getrf_batched hipblasStatus_t hipblasCgetrfBatchedCast(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, int* info, const int batchCount) { return hipblasCgetrfBatched(handle, n, (hipComplex* const*)A, lda, ipiv, info, batchCount); } hipblasStatus_t hipblasZgetrfBatchedCast(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, int* info, const int batchCount) { return hipblasZgetrfBatched( handle, n, (hipDoubleComplex* const*)A, lda, ipiv, info, batchCount); } // getrf_strided_batched hipblasStatus_t hipblasCgetrfStridedBatchedCast(hipblasHandle_t handle, const int n, hipblasComplex* A, const int lda, const hipblasStride strideA, int* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasCgetrfStridedBatched( handle, n, (hipComplex*)A, lda, strideA, ipiv, strideP, info, batchCount); } hipblasStatus_t hipblasZgetrfStridedBatchedCast(hipblasHandle_t handle, const int n, hipblasDoubleComplex* A, const int lda, const hipblasStride strideA, int* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasZgetrfStridedBatched( handle, n, (hipDoubleComplex*)A, lda, strideA, ipiv, strideP, info, batchCount); } // getrs hipblasStatus_t hipblasCgetrsCast(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* A, const int lda, const int* ipiv, hipblasComplex* B, const int ldb, int* info) { return hipblasCgetrs( handle, trans, n, nrhs, (hipComplex*)A, lda, ipiv, (hipComplex*)B, ldb, info); } hipblasStatus_t hipblasZgetrsCast(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* A, const int lda, const int* ipiv, hipblasDoubleComplex* B, const int ldb, int* info) { return hipblasZgetrs( handle, trans, n, nrhs, (hipDoubleComplex*)A, lda, ipiv, (hipDoubleComplex*)B, ldb, info); } // getrs_batched hipblasStatus_t hipblasCgetrsBatchedCast(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* const A[], const int lda, const int* ipiv, hipblasComplex* const B[], const int ldb, int* info, const int batchCount) { return hipblasCgetrsBatched(handle, trans, n, nrhs, (hipComplex* const*)A, lda, ipiv, (hipComplex* const*)B, ldb, info, batchCount); } hipblasStatus_t hipblasZgetrsBatchedCast(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* const A[], const int lda, const int* ipiv, hipblasDoubleComplex* const B[], const int ldb, int* info, const int batchCount) { return hipblasZgetrsBatched(handle, trans, n, nrhs, (hipDoubleComplex* const*)A, lda, ipiv, (hipDoubleComplex* const*)B, ldb, info, batchCount); } // getrs_strided_batched hipblasStatus_t hipblasCgetrsStridedBatchedCast(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex* A, const int lda, const hipblasStride strideA, const int* ipiv, const hipblasStride strideP, hipblasComplex* B, const int ldb, const hipblasStride strideB, int* info, const int batchCount) { return hipblasCgetrsStridedBatched(handle, trans, n, nrhs, (hipComplex*)A, lda, strideA, ipiv, strideP, (hipComplex*)B, ldb, strideB, info, batchCount); } hipblasStatus_t hipblasZgetrsStridedBatchedCast(hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex* A, const int lda, const hipblasStride strideA, const int* ipiv, const hipblasStride strideP, hipblasDoubleComplex* B, const int ldb, const hipblasStride strideB, int* info, const int batchCount) { return hipblasZgetrsStridedBatched(handle, trans, n, nrhs, (hipDoubleComplex*)A, lda, strideA, ipiv, strideP, (hipDoubleComplex*)B, ldb, strideB, info, batchCount); } // getri_batched hipblasStatus_t hipblasCgetriBatchedCast(hipblasHandle_t handle, const int n, hipblasComplex* const A[], const int lda, int* ipiv, hipblasComplex* const C[], const int ldc, int* info, const int batchCount) { return hipblasCgetriBatched( handle, n, (hipComplex* const*)A, lda, ipiv, (hipComplex* const*)C, ldc, info, batchCount); } hipblasStatus_t hipblasZgetriBatchedCast(hipblasHandle_t handle, const int n, hipblasDoubleComplex* const A[], const int lda, int* ipiv, hipblasDoubleComplex* const C[], const int ldc, int* info, const int batchCount) { return hipblasZgetriBatched(handle, n, (hipDoubleComplex* const*)A, lda, ipiv, (hipDoubleComplex* const*)C, ldc, info, batchCount); } // geqrf hipblasStatus_t hipblasCgeqrfCast(hipblasHandle_t handle, const int m, const int n, hipblasComplex* A, const int lda, hipblasComplex* ipiv, int* info) { return hipblasCgeqrf(handle, m, n, (hipComplex*)A, lda, (hipComplex*)ipiv, info); } hipblasStatus_t hipblasZgeqrfCast(hipblasHandle_t handle, const int m, const int n, hipblasDoubleComplex* A, const int lda, hipblasDoubleComplex* ipiv, int* info) { return hipblasZgeqrf(handle, m, n, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)ipiv, info); } // geqrf_batched hipblasStatus_t hipblasCgeqrfBatchedCast(hipblasHandle_t handle, const int m, const int n, hipblasComplex* const A[], const int lda, hipblasComplex* const ipiv[], int* info, const int batchCount) { return hipblasCgeqrfBatched( handle, m, n, (hipComplex* const*)A, lda, (hipComplex* const*)ipiv, info, batchCount); } hipblasStatus_t hipblasZgeqrfBatchedCast(hipblasHandle_t handle, const int m, const int n, hipblasDoubleComplex* const A[], const int lda, hipblasDoubleComplex* const ipiv[], int* info, const int batchCount) { return hipblasZgeqrfBatched(handle, m, n, (hipDoubleComplex* const*)A, lda, (hipDoubleComplex* const*)ipiv, info, batchCount); } // geqrf_strided_batched hipblasStatus_t hipblasCgeqrfStridedBatchedCast(hipblasHandle_t handle, const int m, const int n, hipblasComplex* A, const int lda, const hipblasStride strideA, hipblasComplex* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasCgeqrfStridedBatched( handle, m, n, (hipComplex*)A, lda, strideA, (hipComplex*)ipiv, strideP, info, batchCount); } hipblasStatus_t hipblasZgeqrfStridedBatchedCast(hipblasHandle_t handle, const int m, const int n, hipblasDoubleComplex* A, const int lda, const hipblasStride strideA, hipblasDoubleComplex* ipiv, const hipblasStride strideP, int* info, const int batchCount) { return hipblasZgeqrfStridedBatched(handle, m, n, (hipDoubleComplex*)A, lda, strideA, (hipDoubleComplex*)ipiv, strideP, info, batchCount); } // gels hipblasStatus_t hipblasCgelsCast(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasComplex* A, const int lda, hipblasComplex* B, const int ldb, int* info, int* deviceInfo) { return hipblasCgels( handle, trans, m, n, nrhs, (hipComplex*)A, lda, (hipComplex*)B, ldb, info, deviceInfo); } hipblasStatus_t hipblasZgelsCast(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasDoubleComplex* A, const int lda, hipblasDoubleComplex* B, const int ldb, int* info, int* deviceInfo) { return hipblasZgels(handle, trans, m, n, nrhs, (hipDoubleComplex*)A, lda, (hipDoubleComplex*)B, ldb, info, deviceInfo); } // gelsBatched hipblasStatus_t hipblasCgelsBatchedCast(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasComplex* const A[], const int lda, hipblasComplex* const B[], const int ldb, int* info, int* deviceInfo, const int batchCount) { return hipblasCgelsBatched(handle, trans, m, n, nrhs, (hipComplex* const*)A, lda, (hipComplex* const*)B, ldb, info, deviceInfo, batchCount); } hipblasStatus_t hipblasZgelsBatchedCast(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasDoubleComplex* const A[], const int lda, hipblasDoubleComplex* const B[], const int ldb, int* info, int* deviceInfo, const int batchCount) { return hipblasZgelsBatched(handle, trans, m, n, nrhs, (hipDoubleComplex* const*)A, lda, (hipDoubleComplex* const*)B, ldb, info, deviceInfo, batchCount); } // gelsStridedBatched hipblasStatus_t hipblasCgelsStridedBatchedCast(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasComplex* A, const int lda, const hipblasStride strideA, hipblasComplex* B, const int ldb, const hipblasStride strideB, int* info, int* deviceInfo, const int batchCount) { return hipblasCgelsStridedBatched(handle, trans, m, n, nrhs, (hipComplex*)A, lda, strideA, (hipComplex*)B, ldb, strideB, info, deviceInfo, batchCount); } hipblasStatus_t hipblasZgelsStridedBatchedCast(hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasDoubleComplex* A, const int lda, const hipblasStride strideA, hipblasDoubleComplex* B, const int ldb, const hipblasStride strideB, int* info, int* deviceInfo, const int batchCount) { return hipblasZgelsStridedBatched(handle, trans, m, n, nrhs, (hipDoubleComplex*)A, lda, strideA, (hipDoubleComplex*)B, ldb, strideB, info, deviceInfo, batchCount); } #endif // solver #endif // HIPBLAS_V2 hipBLAS-rocm-6.4.3/clients/common/host_alloc.cpp000066400000000000000000000136421500474223100214740ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2018-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #ifdef WIN32 #include #else #include #include #include #endif #include #include #include #include "hipblas_test.hpp" #include "host_alloc.hpp" // light weight memory tracking for threshold limit on total use static size_t mem_used{0}; static std::map mem_allocated; static std::mutex mem_mutex; inline void alloc_ptr_use(void* ptr, size_t size) { std::lock_guard lock(mem_mutex); if(ptr) { mem_allocated[ptr] = size; mem_used += size; } } inline void free_ptr_use(void* ptr) { std::lock_guard lock(mem_mutex); if(ptr && mem_allocated[ptr]) { mem_used -= mem_allocated[ptr]; mem_allocated.erase(ptr); } } size_t host_bytes_allocated() { std::lock_guard lock(mem_mutex); return mem_used; } //! //! @brief Memory free helper. Returns kB or -1 if unknown. //! ptrdiff_t host_bytes_available() { #ifdef WIN32 MEMORYSTATUSEX status; status.dwLength = sizeof(status); GlobalMemoryStatusEx(&status); return (ptrdiff_t)status.ullAvailPhys; #else const int BUF_MAX = 1024; char buf[BUF_MAX]; ptrdiff_t n_bytes = -1; // unknown FILE* fp = popen("cat /proc/meminfo", "r"); if(fp == NULL) { return n_bytes; } static const char* mem_token = "MemFree"; static auto* mem_free_type = getenv("HIPBLAS_CLIENT_ALLOC_AVAILABLE"); if(mem_free_type) { mem_token = "MemAvail"; // MemAvailable } int mem_token_len = strlen(mem_token); while(fgets(buf, BUF_MAX, fp) != NULL) { // set env HIPBLAS_CLIENT_ALLOC_AVAILABLE to use MemAvailable if too many SKIPS occur if(!strncmp(buf, mem_token, mem_token_len)) { sscanf(buf, "%*s %td", &n_bytes); // kB assumed as 3rd column and ignored n_bytes *= 1024; break; } } int status = pclose(fp); if(status == -1) { return -1; } else { return n_bytes; } #endif } inline bool host_mem_safe(size_t n_bytes) { #if defined(HIPBLAS_BENCH) return true; // roll out to hipblas-bench when CI does perf testing #else static auto* no_alloc_check = getenv("HIPBLAS_CLIENT_NO_ALLOC_CHECK"); if(no_alloc_check) { return true; } constexpr size_t threshold = 100 * 1024 * 1024; // 100 MB static size_t client_ram_limit = 0; static int once = [&] { auto* alloc_limit = getenv("HIPBLAS_CLIENT_RAM_GB_LIMIT"); if(alloc_limit) { size_t mem_limit; client_ram_limit = sscanf(alloc_limit, "%zu", &mem_limit) == 1 ? mem_limit : 0; client_ram_limit <<= 30; // B to GB } return 0; }(); if(n_bytes > threshold) { if(client_ram_limit) { if(host_bytes_allocated() + n_bytes > client_ram_limit) { std::cout << "Warning: skipped allocating " << n_bytes << " bytes (" << (n_bytes >> 30) << " GB) as total would be more than client limit (" << (client_ram_limit >> 30) << " GB)" << std::endl; return false; } } ptrdiff_t avail_bytes = host_bytes_available(); // negative if unknown if(avail_bytes >= 0 && n_bytes > avail_bytes) { std::cout << "Warning: skipped allocating " << n_bytes << " bytes (" << (n_bytes >> 30) << " GB) as more than free memory (" << (avail_bytes >> 30) << " GB)" << std::endl; // we don't try if it looks to push load into swap return false; } } return true; #endif } void* host_malloc(size_t size) { if(host_mem_safe(size)) { void* ptr = malloc(size); static int value = -1; static auto once = false; if(!once) { auto* alloc_byte_str = getenv("HIPBLAS_CLIENT_ALLOC_FILL_HEX_BYTE"); if(alloc_byte_str) { value = strtol(alloc_byte_str, nullptr, 16); // hex } once = true; } if(value != -1 && ptr) memset(ptr, value, size); alloc_ptr_use(ptr, size); return ptr; } else return nullptr; } void* host_calloc(size_t nmemb, size_t size) { if(host_mem_safe(nmemb * size)) { void* ptr = calloc(nmemb, size); alloc_ptr_use(ptr, size); return ptr; } else return nullptr; } void host_free(void* ptr) { free(ptr); free_ptr_use(ptr); } hipBLAS-rocm-6.4.3/clients/common/near.cpp000066400000000000000000000406321500474223100202710ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * * ************************************************************************ */ #include "near.h" #include "hipblas.h" #include "host_vector.hpp" #include "utility.h" /* ========================================Gtest Unit Check * ==================================================== */ /*! \brief Template: gtest unit compare two matrices float/double/complex */ // Do not put a wrapper over ASSERT_FLOAT_EQ, sincer assert exit the current function NOT the test // case // a wrapper will cause the loop keep going #ifndef GOOGLE_TEST #define NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, err, NEAR_ASSERT) #define NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, err, NEAR_ASSERT) #else #define NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, err, NEAR_ASSERT) \ do \ { \ for(size_t k = 0; k < batch_count; k++) \ for(size_t j = 0; j < N; j++) \ for(size_t i = 0; i < M; i++) \ if(hipblas_isnan(hCPU[i + j * lda + k * strideA])) \ { \ ASSERT_TRUE(hipblas_isnan(hGPU[i + j * lda + k * strideA])); \ } \ else \ { \ NEAR_ASSERT(hCPU[i + j * lda + k * strideA], \ hGPU[i + j * lda + k * strideA], \ err); \ } \ } while(0) #define NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, err, NEAR_ASSERT) \ do \ { \ for(size_t k = 0; k < batch_count; k++) \ for(size_t j = 0; j < N; j++) \ for(size_t i = 0; i < M; i++) \ if(hipblas_isnan(hCPU[k][i + j * lda])) \ { \ ASSERT_TRUE(hipblas_isnan(hGPU[k][i + j * lda])); \ } \ else \ { \ NEAR_ASSERT(hCPU[k][i + j * lda], hGPU[k][i + j * lda], err); \ } \ } while(0) #endif #define NEAR_ASSERT_HALF(a, b, err) ASSERT_NEAR(half_to_float(a), half_to_float(b), err) #define NEAR_ASSERT_BF16(a, b, err) ASSERT_NEAR(bfloat16_to_float(a), bfloat16_to_float(b), err) #define NEAR_ASSERT_COMPLEX(a, b, err) \ do \ { \ auto ta = (a), tb = (b); \ ASSERT_NEAR(ta.real(), tb.real(), err); \ ASSERT_NEAR(ta.imag(), tb.imag(), err); \ } while(0) template <> void near_check_general( int64_t M, int64_t N, int64_t lda, int32_t* hCPU, int32_t* hGPU, double abs_error) { NEAR_CHECK(M, N, 1, lda, 0, hCPU, hGPU, abs_error, ASSERT_NEAR); } template <> void near_check_general( int64_t M, int64_t N, int64_t lda, float* hCPU, float* hGPU, double abs_error) { NEAR_CHECK(M, N, 1, lda, 0, hCPU, hGPU, abs_error, ASSERT_NEAR); } template <> void near_check_general( int64_t M, int64_t N, int64_t lda, double* hCPU, double* hGPU, double abs_error) { NEAR_CHECK(M, N, 1, lda, 0, hCPU, hGPU, abs_error, ASSERT_NEAR); } template <> void near_check_general( int64_t M, int64_t N, int64_t lda, hipblasHalf* hCPU, hipblasHalf* hGPU, double abs_error) { NEAR_CHECK(M, N, 1, lda, 0, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF); } template <> void near_check_general(int64_t M, int64_t N, int64_t lda, hipblasBfloat16* hCPU, hipblasBfloat16* hGPU, double abs_error) { NEAR_CHECK(M, N, 1, lda, 0, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16); } template <> void near_check_general( int64_t M, int64_t N, int64_t lda, hipblasComplex* hCPU, hipblasComplex* hGPU, double abs_error) { abs_error *= sqrthalf; NEAR_CHECK(M, N, 1, lda, 0, hCPU, hGPU, abs_error, NEAR_ASSERT_COMPLEX); } template <> void near_check_general(int64_t M, int64_t N, int64_t lda, hipblasDoubleComplex* hCPU, hipblasDoubleComplex* hGPU, double abs_error) { abs_error *= sqrthalf; NEAR_CHECK(M, N, 1, lda, 0, hCPU, hGPU, abs_error, NEAR_ASSERT_COMPLEX); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasStride strideA, int32_t* hCPU, int32_t* hGPU, double abs_error) { NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, abs_error, ASSERT_NEAR); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasStride strideA, float* hCPU, float* hGPU, double abs_error) { NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, abs_error, ASSERT_NEAR); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasStride strideA, double* hCPU, double* hGPU, double abs_error) { NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, abs_error, ASSERT_NEAR); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasStride strideA, hipblasHalf* hCPU, hipblasHalf* hGPU, double abs_error) { NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasStride strideA, hipblasBfloat16* hCPU, hipblasBfloat16* hGPU, double abs_error) { NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasStride strideA, hipblasComplex* hCPU, hipblasComplex* hGPU, double abs_error) { abs_error *= sqrthalf; NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, abs_error, NEAR_ASSERT_COMPLEX); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasStride strideA, hipblasDoubleComplex* hCPU, hipblasDoubleComplex* hGPU, double abs_error) { abs_error *= sqrthalf; NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, abs_error, NEAR_ASSERT_COMPLEX); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, host_vector hCPU[], host_vector hGPU[], double abs_error) { NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, host_vector hCPU[], host_vector hGPU[], double abs_error) { NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, host_vector hCPU[], host_vector hGPU[], double abs_error) { NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, ASSERT_NEAR); } void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, host_vector hCPU[], host_vector hGPU[], double abs_error) { NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, ASSERT_NEAR); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, host_vector hCPU[], host_vector hGPU[], double abs_error) { NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, ASSERT_NEAR); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, host_vector hCPU[], host_vector hGPU[], double abs_error) { abs_error *= sqrthalf; NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_COMPLEX); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, host_vector hCPU[], host_vector hGPU[], double abs_error) { abs_error *= sqrthalf; NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_COMPLEX); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasHalf* hCPU[], hipblasHalf* hGPU[], double abs_error) { NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasBfloat16* hCPU[], hipblasBfloat16* hGPU[], double abs_error) { NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, int32_t* hCPU[], int32_t* hGPU[], double abs_error) { NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, ASSERT_NEAR); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, float* hCPU[], float* hGPU[], double abs_error) { NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, ASSERT_NEAR); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, double* hCPU[], double* hGPU[], double abs_error) { NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, ASSERT_NEAR); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasComplex* hCPU[], hipblasComplex* hGPU[], double abs_error) { abs_error *= sqrthalf; NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_COMPLEX); } template <> void near_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasDoubleComplex* hCPU[], hipblasDoubleComplex* hGPU[], double abs_error) { abs_error *= sqrthalf; NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_COMPLEX); } hipBLAS-rocm-6.4.3/clients/common/norm.cpp000066400000000000000000000315071500474223100203200ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * * ************************************************************************ */ #include "norm.h" #include "cblas.h" #include "hipblas.h" #include "lapack_utilities.hpp" #include /* ===================================================================== README: Norm check: norm(A-B)/norm(A), evaluate relative error Numerically, it is recommended by lapack. Call lapack fortran routines that do not exsit in cblas library. No special header is required. But need to declare function prototype All the functions are fortran and should append underscore (_) while declaring prototype and calling. xlange and xaxpy prototype are like following =================================================================== */ #ifdef __cplusplus extern "C" { #endif // float slange_(char* norm_type, int* m, int* n, float* A, int* lda, float* work); // double dlange_(char* norm_type, int* m, int* n, double* A, int* lda, double* work); // float clange_(char* norm_type, int* m, int* n, hipblasComplex* A, int* lda, float* work); // double zlange_(char* norm_type, int* m, int* n, hipblasDoubleComplex* A, int* lda, double* work); // float slansy_(char* norm_type, char* uplo, int* n, float* A, int* lda, float* work); // double dlansy_(char* norm_type, char* uplo, int* n, double* A, int* lda, double* work); // float clanhe_(char* norm_type, char* uplo, int* n, hipblasComplex* A, int* lda, float* work); // double zlanhe_(char* norm_type, char* uplo, int* n, hipblasDoubleComplex* A, int* lda, double* // work); // void m_axpy_64(int* n, float* alpha, float* x, int* incx, float* y, int* incy); // void m_axpy_64(int* n, double* alpha, double* x, int* incx, double* y, int* incy); // void m_axpy_64( // int* n, hipblasComplex* alpha, hipblasComplex* x, int* incx, hipblasComplex* y, int* incy); // void m_axpy_64(int* n, // hipblasDoubleComplex* alpha, // hipblasDoubleComplex* x, // int* incx, // hipblasDoubleComplex* y, // int* incy); #ifdef __cplusplus } #endif template void m_axpy_64(int64_t N, T* alpha, T* x, int64_t incx, T* y, int64_t incy) { int64_t x_offset = incx >= 0 ? 0 : incx * (1 - N); int64_t y_offset = incy >= 0 ? 0 : incy * (1 - N); for(int64_t i = 0; i < N; i++) { y[y_offset + i * incy] = (*alpha) * x[x_offset + i * incx] + y[y_offset + i * incy]; } } /* ============================Norm Check for General Matrix: float/double/complex template * speciliazation ======================================= */ /*! \brief compare the norm error of two matrices hCPU & hGPU */ template <> double norm_check_general( char norm_type, int64_t M, int64_t N, int64_t lda, float* hCPU, float* hGPU) { // norm type can be M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly host_vector work(std::max(int64_t(1), M)); int64_t incx = 1; float alpha = -1.0f; int64_t size = lda * N; double cpu_norm = lapack_xlange(norm_type, M, N, hCPU, lda, work.data()); m_axpy_64(size, &alpha, hCPU, incx, hGPU, incx); double error = lapack_xlange(norm_type, M, N, hGPU, lda, work.data()) / cpu_norm; return error; } template <> double norm_check_general( char norm_type, int64_t M, int64_t N, int64_t lda, double* hCPU, double* hGPU) { // norm type can be M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly host_vector work(std::max(int64_t(1), M)); int64_t incx = 1; double alpha = -1.0; int64_t size = lda * N; double cpu_norm = lapack_xlange(norm_type, M, N, hCPU, lda, work.data()); m_axpy_64(size, &alpha, hCPU, incx, hGPU, incx); double error = lapack_xlange(norm_type, M, N, hGPU, lda, work.data()) / cpu_norm; return error; } template <> double norm_check_general( char norm_type, int64_t M, int64_t N, int64_t lda, hipblasComplex* hCPU, hipblasComplex* hGPU) { //norm type can be M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly host_vector work(std::max(int64_t(1), M)); int64_t incx = 1; hipblasComplex alpha = -1.0f; int64_t size = lda * N; double cpu_norm = lapack_xlange(norm_type, M, N, hCPU, lda, work.data()); m_axpy_64(size, &alpha, hCPU, incx, hGPU, incx); double error = lapack_xlange(norm_type, M, N, hGPU, lda, work.data()) / cpu_norm; return error; } template <> double norm_check_general(char norm_type, int64_t M, int64_t N, int64_t lda, hipblasDoubleComplex* hCPU, hipblasDoubleComplex* hGPU) { //norm type can be M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly host_vector work(std::max(int64_t(1), M)); int64_t incx = 1; hipblasDoubleComplex alpha = -1.0; int64_t size = lda * N; double cpu_norm = lapack_xlange(norm_type, M, N, hCPU, lda, work.data()); m_axpy_64(size, &alpha, hCPU, incx, hGPU, incx); double error = lapack_xlange(norm_type, M, N, hGPU, lda, work.data()) / cpu_norm; return error; } template <> double norm_check_general( char norm_type, int64_t M, int64_t N, int64_t lda, hipblasHalf* hCPU, hipblasHalf* hGPU) { // norm type can be 'M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly host_vector hCPU_double(N * lda); host_vector hGPU_double(N * lda); for(int64_t i = 0; i < M; i++) { for(int64_t j = 0; j < N; j++) { hCPU_double[i + j * lda] = hCPU[i + j * lda]; hGPU_double[i + j * lda] = hGPU[i + j * lda]; } } return norm_check_general(norm_type, M, N, lda, hCPU_double, hGPU_double); } template <> double norm_check_general( char norm_type, int64_t M, int64_t N, int64_t lda, hipblasBfloat16* hCPU, hipblasBfloat16* hGPU) { // norm type can be 'M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly host_vector hCPU_double(N * lda); host_vector hGPU_double(N * lda); for(int64_t i = 0; i < M; i++) { for(int64_t j = 0; j < N; j++) { hCPU_double[i + j * lda] = bfloat16_to_float(hCPU[i + j * lda]); hGPU_double[i + j * lda] = bfloat16_to_float(hGPU[i + j * lda]); } } return norm_check_general(norm_type, M, N, lda, hCPU_double, hGPU_double); } template <> double norm_check_general( char norm_type, int64_t M, int64_t N, int64_t lda, int32_t* hCPU, int32_t* hGPU) { // norm type can be 'M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly host_vector hCPU_float(N * lda); host_vector hGPU_float(N * lda); for(int64_t i = 0; i < M; i++) { for(int64_t j = 0; j < N; j++) { hCPU_float[i + j * lda] = (hCPU[i + j * lda]); hGPU_float[i + j * lda] = (hGPU[i + j * lda]); } } return norm_check_general(norm_type, M, N, lda, hCPU_float, hGPU_float); } /* ============================Norm Check for Symmetric Matrix: float/double/complex template * speciliazation ======================================= */ /*! \brief compare the norm error of two hermitian/symmetric matrices hCPU & hGPU */ template , int> = 0> double norm_check_symmetric(char norm_type, char uplo, int64_t N, int64_t lda, T* hCPU, T* hGPU) { // norm type can be M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly host_vector work(std::max(int64_t(1), N)); int64_t incx = 1; double alpha = -1.0; size_t size = N * (size_t)lda; host_vector hCPU_double(size); host_vector hGPU_double(size); for(int64_t i = 0; i < N; i++) { for(int64_t j = 0; j < N; j++) { size_t idx = j + i * (size_t)lda; hCPU_double[idx] = double(hCPU[idx]); hGPU_double[idx] = double(hGPU[idx]); } } constexpr bool HERM = false; double cpu_norm = lapack_xlansy(norm_type, uplo, N, hCPU_double.data(), lda, work.data()); m_axpy_64(size, &alpha, hCPU_double.data(), incx, hGPU_double.data(), incx); double error = lapack_xlansy(norm_type, uplo, N, hGPU_double.data(), lda, work.data()) / cpu_norm; return error; } template , int> = 0> double norm_check_symmetric(char norm_type, char uplo, int64_t N, int64_t lda, T* hCPU, T* hGPU) { // norm type can be M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly host_vector work(std::max(int64_t(1), N)); int64_t incx = 1; T alpha = -1.0; size_t size = (size_t)lda * N; constexpr bool HERM = true; double cpu_norm = lapack_xlansy(norm_type, uplo, N, hCPU, lda, work.data()); m_axpy_64(size, &alpha, hCPU, incx, hGPU, incx); double error = lapack_xlansy(norm_type, uplo, N, hGPU, lda, work.data()) / cpu_norm; return error; } /* template <> double norm_check_symmetric( char norm_type, char uplo, int64_t N, int64_t lda, float* hCPU, float* hGPU) { // norm type can be M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly float work[1]; int64_t incx = 1; float alpha = -1.0f; int64_t size = lda * N; float cpu_norm = slansy_(&norm_type, &uplo, &N, hCPU, &lda, work); m_axpy_64(size, &alpha, hCPU, incx, hGPU, incx); float error = slansy_(&norm_type, &uplo, &N, hGPU, &lda, work) / cpu_norm; return (double)error; } template <> double norm_check_symmetric( char norm_type, char uplo, int64_t N, int64_t lda, double* hCPU, double* hGPU) { // norm type can be M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly double work[1]; int64_t incx = 1; double alpha = -1.0; int64_t size = lda * N; double cpu_norm = dlansy_(&norm_type, &uplo, &N, hCPU, &lda, work); m_axpy_64(size, &alpha, hCPU, incx, hGPU, incx); double error = dlansy_(&norm_type, &uplo, &N, hGPU, &lda, work) / cpu_norm; return error; } */ // template<> // double norm_check_symmetric(char norm_type, char uplo, int64_t N, int64_t lda, hipblasComplex // *hCPU, hipblasComplex *hGPU) //{ ////norm type can be M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly // // float work[1]; // int64_t incx = 1; // hipblasComplex alpha = -1.0f; // int64_t size = lda * N; // // float cpu_norm = clanhe_(&norm_type, &uplo, &N, hCPU, &lda, work); // m_axpy_64(size, &alpha, hCPU, incx, hGPU, incx); // // float error = clanhe_(&norm_type, &uplo, &N, hGPU, &lda, work)/cpu_norm; // // return (double)error; //} // // template<> // double norm_check_symmetric(char norm_type, char uplo, int64_t N, int64_t lda, // hipblasDoubleComplex *hCPU, hipblasDoubleComplex *hGPU) //{ ////norm type can be M', 'I', 'F', 'l': 'F' (Frobenius norm) is used mostly // // double work[1]; // int64_t incx = 1; // hipblasDoubleComplex alpha = -1.0; // int64_t size = lda * N; // // double cpu_norm = zlanhe_(&norm_type, &uplo, &N, hCPU, &lda, work); // m_axpy_64(size, &alpha, hCPU, incx, hGPU, incx); // // double error = zlanhe_(&norm_type, &uplo, &N, hGPU, &lda, work)/cpu_norm; // // return error; //} hipBLAS-rocm-6.4.3/clients/common/unit.cpp000066400000000000000000000362131500474223100203230ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "unit.h" #include "hipblas.h" #include "host_vector.hpp" #include "utility.h" /* ========================================Gtest Unit Check * ==================================================== */ /*! \brief Template: gtest unit compare two matrices float/double/complex */ // This returns from the current function if an error occurs #ifndef GOOGLE_TEST #define UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, UNIT_ASSERT_EQ) #define UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, UNIT_ASSERT_EQ) #else // GOOGLE_TEST #define UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, UNIT_ASSERT_EQ) \ do \ { \ for(size_t k = 0; k < batch_count; k++) \ for(size_t j = 0; j < N; j++) \ for(size_t i = 0; i < M; i++) \ if(hipblas_isnan(hCPU[i + j * lda + k * strideA])) \ { \ ASSERT_TRUE(hipblas_isnan(hGPU[i + j * lda + k * strideA])); \ } \ else \ { \ UNIT_ASSERT_EQ(hCPU[i + j * lda + k * strideA], \ hGPU[i + j * lda + k * strideA]); \ } \ } while(0) #define UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, UNIT_ASSERT_EQ) \ do \ { \ for(size_t k = 0; k < batch_count; k++) \ for(size_t j = 0; j < N; j++) \ for(size_t i = 0; i < M; i++) \ if(hipblas_isnan(hCPU[k][i + j * lda])) \ { \ ASSERT_TRUE(hipblas_isnan(hGPU[k][i + j * lda])); \ } \ else \ { \ UNIT_ASSERT_EQ(hCPU[k][i + j * lda], hGPU[k][i + j * lda]); \ } \ } while(0) #endif // GOOGLE_TEST #define ASSERT_HALF_EQ(a, b) ASSERT_FLOAT_EQ(half_to_float(a), half_to_float(b)) #define ASSERT_BFLOAT16_EQ(a, b) ASSERT_FLOAT_EQ(bfloat16_to_float(a), bfloat16_to_float(b)) #define ASSERT_FLOAT_COMPLEX_EQ(a, b) \ do \ { \ ASSERT_FLOAT_EQ(a.real(), b.real()); \ ASSERT_FLOAT_EQ(a.imag(), b.imag()); \ } while(0) #define ASSERT_DOUBLE_COMPLEX_EQ(a, b) \ do \ { \ ASSERT_DOUBLE_EQ(a.real(), b.real()); \ ASSERT_DOUBLE_EQ(a.imag(), b.imag()); \ } while(0) template <> void unit_check_general(int64_t M, int64_t N, int64_t lda, hipblasHalf* hCPU, hipblasHalf* hGPU) { UNIT_CHECK(M, N, 1, lda, 0, hCPU, hGPU, ASSERT_HALF_EQ); } template <> void unit_check_general( int64_t M, int64_t N, int64_t lda, hipblasBfloat16* hCPU, hipblasBfloat16* hGPU) { UNIT_CHECK(M, N, 1, lda, 0, hCPU, hGPU, ASSERT_BFLOAT16_EQ); } template <> void unit_check_general(int64_t M, int64_t N, int64_t lda, float* hCPU, float* hGPU) { UNIT_CHECK(M, N, 1, lda, 0, hCPU, hGPU, ASSERT_FLOAT_EQ); } template <> void unit_check_general(int64_t M, int64_t N, int64_t lda, double* hCPU, double* hGPU) { UNIT_CHECK(M, N, 1, lda, 0, hCPU, hGPU, ASSERT_DOUBLE_EQ); } template <> void unit_check_general( int64_t M, int64_t N, int64_t lda, hipblasComplex* hCPU, hipblasComplex* hGPU) { #ifdef GOOGLE_TEST for(int64_t j = 0; j < N; j++) for(int64_t i = 0; i < M; i++) { ASSERT_FLOAT_EQ(hCPU[i + j * lda].real(), hGPU[i + j * lda].real()); ASSERT_FLOAT_EQ(hCPU[i + j * lda].imag(), hGPU[i + j * lda].imag()); } #endif } template <> void unit_check_general( int64_t M, int64_t N, int64_t lda, hipblasDoubleComplex* hCPU, hipblasDoubleComplex* hGPU) { #ifdef GOOGLE_TEST for(int64_t j = 0; j < N; j++) for(int64_t i = 0; i < M; i++) { ASSERT_DOUBLE_EQ(hCPU[i + j * lda].real(), hGPU[i + j * lda].real()); ASSERT_DOUBLE_EQ(hCPU[i + j * lda].imag(), hGPU[i + j * lda].imag()); } #endif } template <> void unit_check_general(int64_t M, int64_t N, int64_t lda, int* hCPU, int* hGPU) { UNIT_CHECK(M, N, 1, lda, 0, hCPU, hGPU, ASSERT_EQ); } template <> void unit_check_general(int64_t M, int64_t N, int64_t lda, int64_t* hCPU, int64_t* hGPU) { UNIT_CHECK(M, N, 1, lda, 0, hCPU, hGPU, ASSERT_EQ); } // batched checks template <> void unit_check_general( int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasHalf** hCPU, hipblasHalf** hGPU) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_HALF_EQ); } template <> void unit_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasBfloat16** hCPU, hipblasBfloat16** hGPU) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_BFLOAT16_EQ); } template <> void unit_check_general( int64_t M, int64_t N, int64_t batch_count, int64_t lda, float** hCPU, float** hGPU) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_FLOAT_EQ); } template <> void unit_check_general( int64_t M, int64_t N, int64_t batch_count, int64_t lda, double** hCPU, double** hGPU) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_DOUBLE_EQ); } template <> void unit_check_general( int64_t M, int64_t N, int64_t batch_count, int64_t lda, int** hCPU, int** hGPU) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_EQ); } template <> void unit_check_general( int64_t M, int64_t N, int64_t batch_count, int64_t lda, int64_t** hCPU, int64_t** hGPU) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_EQ); } template <> void unit_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasComplex** hCPU, hipblasComplex** hGPU) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_FLOAT_COMPLEX_EQ); } template <> void unit_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasDoubleComplex** hCPU, hipblasDoubleComplex** hGPU) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_DOUBLE_COMPLEX_EQ); } // batched checks for host_vector[]s template <> void unit_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, host_vector hCPU[], host_vector hGPU[]) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_HALF_EQ); } template <> void unit_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, host_vector hCPU[], host_vector hGPU[]) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_BFLOAT16_EQ); } template <> void unit_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, host_vector hCPU[], host_vector hGPU[]) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_EQ); } template <> void unit_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, host_vector hCPU[], host_vector hGPU[]) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_FLOAT_EQ); } template <> void unit_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, host_vector hCPU[], host_vector hGPU[]) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_DOUBLE_EQ); } template <> void unit_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, host_vector hCPU[], host_vector hGPU[]) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_FLOAT_COMPLEX_EQ); } template <> void unit_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, host_vector hCPU[], host_vector hGPU[]) { UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, ASSERT_DOUBLE_COMPLEX_EQ); } // strided_batched checks template <> void unit_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasStride strideA, hipblasHalf* hCPU, hipblasHalf* hGPU) { UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, ASSERT_HALF_EQ); } template <> void unit_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasStride strideA, hipblasBfloat16* hCPU, hipblasBfloat16* hGPU) { UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, ASSERT_BFLOAT16_EQ); } template <> void unit_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasStride strideA, float* hCPU, float* hGPU) { UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, ASSERT_FLOAT_EQ); } template <> void unit_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasStride strideA, double* hCPU, double* hGPU) { UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, ASSERT_DOUBLE_EQ); } template <> void unit_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasStride strideA, hipblasComplex* hCPU, hipblasComplex* hGPU) { UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, ASSERT_FLOAT_COMPLEX_EQ); } template <> void unit_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasStride strideA, hipblasDoubleComplex* hCPU, hipblasDoubleComplex* hGPU) { UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, ASSERT_DOUBLE_COMPLEX_EQ); } template <> void unit_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasStride strideA, int* hCPU, int* hGPU) { UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, ASSERT_EQ); } template <> void unit_check_general(int64_t M, int64_t N, int64_t batch_count, int64_t lda, hipblasStride strideA, int64_t* hCPU, int64_t* hGPU) { UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, ASSERT_EQ); } hipBLAS-rocm-6.4.3/clients/common/utility.cpp000066400000000000000000000356131500474223100210520ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * * ************************************************************************ */ #ifdef WIN32 #include // #include #endif #include "hipblas.h" #include "hipblas_test.hpp" #include "utility.h" #include #include #include #include #include #ifdef WIN32 #define strcasecmp(A, B) _stricmp(A, B) #ifdef __cpp_lib_filesystem #include namespace fs = std::filesystem; #else #include namespace fs = std::experimental::filesystem; #endif // Not WIN32 #else #include #include #endif // global for device memory padding see d_vector.hpp size_t g_DVEC_PAD = 4096; void d_vector_set_pad_length(size_t pad) { g_DVEC_PAD = pad; } hipblas_rng_t hipblas_rng(69069); hipblas_rng_t hipblas_seed(hipblas_rng); int64_t c_i32_overflow = int64_t(std::numeric_limits::max()) + 1; // 2147483648 template <> char type2char() { return 's'; } template <> char type2char() { return 'd'; } // template<> // char type2char(){ // return 'c'; // } // template<> // char type2char(){ // return 'z'; // } template <> int type2int(float val) { return (int)val; } template <> int type2int(double val) { return (int)val; } template <> int type2int(hipblasComplex val) { return (int)val.real(); } template <> int type2int(hipblasDoubleComplex val) { return (int)val.real(); } /* ============================================================================================ */ // Return path of this executable std::string hipblas_exepath() { #ifdef WIN32 std::vector result(MAX_PATH + 1); // Ensure result is large enough to accomodate the path for(;;) { auto length = GetModuleFileNameA(nullptr, result.data(), result.size()); if(length < result.size() - 1) { result.resize(length + 1); // result.shrink_to_fit(); break; } result.resize(result.size() * 2); } fs::path exepath(result.begin(), result.end()); exepath = exepath.remove_filename(); // Add trailing "/" to exepath if required exepath += exepath.empty() ? "" : "/"; return exepath.string(); #else std::string pathstr; char* path = realpath("/proc/self/exe", 0); if(path) { char* p = strrchr(path, '/'); if(p) { p[1] = 0; pathstr = path; } free(path); } return pathstr; #endif } /* ============================================================================================ */ // Temp directory rooted random path std::string hipblas_tempname() { #ifdef WIN32 // Generate "/tmp/hipblas-XXXXXX" like file name const std::string alphanum = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuv"; int stringlength = alphanum.length() - 1; std::string uniquestr = "hipblas-"; for(auto n : {0, 1, 2, 3, 4, 5}) uniquestr += alphanum.at(rand() % stringlength); fs::path tmpname = fs::temp_directory_path() / uniquestr; return tmpname.string(); #else char tmp[] = "/tmp/hipblas-XXXXXX"; int fd = mkostemp(tmp, O_CLOEXEC); if(fd == -1) { dprintf(STDERR_FILENO, "Cannot open temporary file: %m\n"); exit(EXIT_FAILURE); } return std::string(tmp); #endif } /***************** * local handles * *****************/ hipblasLocalHandle::hipblasLocalHandle() { auto status = hipblasCreate(&m_handle); if(status != HIPBLAS_STATUS_SUCCESS) throw std::runtime_error(hipblasStatusToString(status)); } hipblasLocalHandle::hipblasLocalHandle(const Arguments& arg) : hipblasLocalHandle() { hipblasAtomicsMode_t mode; auto status = hipblasGetAtomicsMode(m_handle, &mode); if(status != HIPBLAS_STATUS_SUCCESS) throw std::runtime_error(hipblasStatusToString(status)); if(mode != hipblasAtomicsMode_t(arg.atomics_mode)) status = hipblasSetAtomicsMode(m_handle, hipblasAtomicsMode_t(arg.atomics_mode)); if(status == HIPBLAS_STATUS_SUCCESS) { /* // If the test specifies user allocated workspace, allocate and use it if(arg.user_allocated_workspace) { if((hipMalloc)(&m_memory, arg.user_allocated_workspace) != hipSuccess) throw std::bad_alloc(); status = rocblas_set_workspace(m_handle, m_memory, arg.user_allocated_workspace); } */ } else { throw std::runtime_error(hipblasStatusToString(status)); } // memory guard control, with multi-threading should not change values across threads d_vector_set_pad_length(arg.pad); } hipblasLocalHandle::~hipblasLocalHandle() { if(m_memory) { // m_memory never used currently auto hipStatus = hipFree(m_memory); if(hipStatus != hipSuccess) { std::cerr << "error freeing hip memory in hipblasLocalHandle: " << hipGetErrorString(hipStatus) << "\n"; } } hipblasStatus_t status = hipblasDestroy(m_handle); if(status != HIPBLAS_STATUS_SUCCESS) { std::cerr << "hipblasDestroy error: " << hipblasStatusToString(status) << "\n"; #ifdef GOOGLE_TEST EXPECT_EQ(status, HIPBLAS_STATUS_SUCCESS); #endif } } /******************************************************************************* * \brief convert hipError_t to hipblasStatus_t * TODO - enumerate library calls to hip runtime, enumerate possible errors from those calls ******************************************************************************/ hipblasStatus_t hipblas_internal_convert_hip_to_hipblas_status(hipError_t status) { switch(status) { // success case hipSuccess: return HIPBLAS_STATUS_SUCCESS; // internal hip memory allocation case hipErrorMemoryAllocation: case hipErrorLaunchOutOfResources: return HIPBLAS_STATUS_MAPPING_ERROR; // user-allocated hip memory case hipErrorInvalidDevicePointer: // hip memory return HIPBLAS_STATUS_MAPPING_ERROR; // user-allocated device, stream, event case hipErrorInvalidDevice: case hipErrorInvalidResourceHandle: return HIPBLAS_STATUS_HANDLE_IS_NULLPTR; // library using hip incorrectly case hipErrorInvalidValue: return HIPBLAS_STATUS_INTERNAL_ERROR; // hip catch find matching ISA case hipErrorNoBinaryForGpu: return HIPBLAS_STATUS_ARCH_MISMATCH; // hip runtime failing case hipErrorNoDevice: // no hip devices case hipErrorUnknown: default: return HIPBLAS_STATUS_UNKNOWN; } } hipblasStatus_t hipblas_internal_convert_hip_to_hipblas_status_and_log(hipError_t status) { hipblasStatus_t lib_status = hipblas_internal_convert_hip_to_hipblas_status(status); std::cerr << "hipBLAS error from hip error code: '" << hipGetErrorName(status) << "':" << status << std::endl; return lib_status; } std::string getArchString() { int device; auto hipStatus = hipGetDevice(&device); if(hipStatus != hipSuccess) { std::cerr << "Error with hipGetDevice: " << hipGetErrorString(hipStatus); return ""; } hipDeviceProp_t deviceProperties; hipStatus = hipGetDeviceProperties(&deviceProperties, device); if(hipStatus != hipSuccess) { std::cerr << "Error with hipGetDeviceProperties: " << hipGetErrorString(hipStatus); return ""; } // strip out xnack/ecc from name std::string deviceFullString(deviceProperties.gcnArchName); std::string deviceString = deviceFullString.substr(0, deviceFullString.find(":")); return deviceString; } #ifdef __cplusplus extern "C" { #endif /* ============================================================================================ */ /* timing:*/ /*! \brief CPU Timer(in microsecond): synchronize with the default device and return wall time */ double get_time_us(void) { (void)hipDeviceSynchronize(); auto now = std::chrono::steady_clock::now(); // now.time_since_epoch() is the dureation since epogh // which is converted to microseconds auto duration = std::chrono::duration_cast(now.time_since_epoch()).count(); return (static_cast(duration)); }; /*! \brief CPU Timer(in microsecond): synchronize with given queue/stream and return wall time */ double get_time_us_sync(hipStream_t stream) { (void)hipStreamSynchronize(stream); auto now = std::chrono::steady_clock::now(); // now.time_since_epoch() is the dureation since epogh // which is converted to microseconds auto duration = std::chrono::duration_cast(now.time_since_epoch()).count(); return (static_cast(duration)); }; /* ============================================================================================ */ /* device query and print out their ID and name; return number of compute-capable devices. */ int query_device_property() { int device_count; hipblasStatus_t status = (hipblasStatus_t)hipGetDeviceCount(&device_count); if(status != HIPBLAS_STATUS_SUCCESS) { printf("Query device error: cannot get device count \n"); return -1; } else { printf("Query device success: there are %d devices \n", device_count); } for(int i = 0; i < device_count; i++) { hipDeviceProp_t props; hipblasStatus_t status = (hipblasStatus_t)hipGetDeviceProperties(&props, i); if(status != HIPBLAS_STATUS_SUCCESS) { printf("Query device error: cannot get device ID %d's property\n", i); } else { printf("Device ID %d : %s ------------------------------------------------------\n", i, props.name); printf("with %3.1f GB memory, clock rate %dMHz @ computing capability %d.%d \n", props.totalGlobalMem / 1e9, (int)(props.clockRate / 1000), props.major, props.minor); printf( "maxGridDimX %d, sharedMemPerBlock %3.1f KB, maxThreadsPerBlock %d, warpSize %d\n", props.maxGridSize[0], props.sharedMemPerBlock / 1e3, props.maxThreadsPerBlock, props.warpSize); printf("-------------------------------------------------------------------------\n"); } } return device_count; } /* set current device to device_id */ void set_device(int device_id) { hipblasStatus_t status = (hipblasStatus_t)hipSetDevice(device_id); if(status != HIPBLAS_STATUS_SUCCESS) { printf("Set device error: cannot set device ID %d, there may not be such device ID\n", (int)device_id); } } /******************************************************************************* * GPU architecture-related functions ******************************************************************************/ hipblasClientProcessor getArch() { int device; auto hipStatus = hipGetDevice(&device); if(hipStatus != hipSuccess) { std::cerr << "Error with hipGetDevice: " << hipGetErrorString(hipStatus); return static_cast(0); } hipDeviceProp_t deviceProperties; hipStatus = hipGetDeviceProperties(&deviceProperties, device); if(hipStatus != hipSuccess) { std::cerr << "Error with hipGetDeviceProperties: " << hipGetErrorString(hipStatus); return static_cast(0); } // strip out xnack/ecc from name std::string deviceFullString(deviceProperties.gcnArchName); std::string deviceString = deviceFullString.substr(0, deviceFullString.find(":")); if(deviceString.find("gfx803") != std::string::npos) { return hipblasClientProcessor::gfx803; } else if(deviceString.find("gfx900") != std::string::npos) { return hipblasClientProcessor::gfx900; } else if(deviceString.find("gfx906") != std::string::npos) { return hipblasClientProcessor::gfx906; } else if(deviceString.find("gfx908") != std::string::npos) { return hipblasClientProcessor::gfx908; } else if(deviceString.find("gfx90a") != std::string::npos) { return hipblasClientProcessor::gfx90a; } else if(deviceString.find("gfx940") != std::string::npos) { return hipblasClientProcessor::gfx940; } else if(deviceString.find("gfx941") != std::string::npos) { return hipblasClientProcessor::gfx941; } else if(deviceString.find("gfx942") != std::string::npos) { return hipblasClientProcessor::gfx942; } else if(deviceString.find("gfx1010") != std::string::npos) { return hipblasClientProcessor::gfx1010; } else if(deviceString.find("gfx1011") != std::string::npos) { return hipblasClientProcessor::gfx1011; } else if(deviceString.find("gfx1012") != std::string::npos) { return hipblasClientProcessor::gfx1012; } else if(deviceString.find("gfx1030") != std::string::npos) { return hipblasClientProcessor::gfx1030; } else if(deviceString.find("gfx1100") != std::string::npos) { return hipblasClientProcessor::gfx1100; } else if(deviceString.find("gfx1101") != std::string::npos) { return hipblasClientProcessor::gfx1101; } else if(deviceString.find("gfx1102") != std::string::npos) { return hipblasClientProcessor::gfx1102; } return static_cast(0); } int getArchMajor() { return static_cast(getArch()) / 100; } #ifdef __cplusplus } #endif hipBLAS-rocm-6.4.3/clients/gtest/000077500000000000000000000000001500474223100164715ustar00rootroot00000000000000hipBLAS-rocm-6.4.3/clients/gtest/CMakeLists.txt000066400000000000000000000277131500474223100212430ustar00rootroot00000000000000# ######################################################################## # Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # ######################################################################## set( THREADS_PREFER_PTHREAD_FLAG ON ) find_package( Threads REQUIRED ) # Linking lapack library requires fortran flags if(NOT WIN32) enable_language( Fortran ) endif() if( NOT TARGET hipblas ) find_package( hipblas REQUIRED CONFIG PATHS /opt/rocm/hipblas ) endif( ) if(EXISTS "${BUILD_DIR}/deps/deps-install/lib/libgtest.a") set( GTEST_ROOT "${BUILD_DIR}/deps/deps-install") endif() find_package( GTest REQUIRED ) set(hipblas_test_source hipblas_gtest_main.cpp hipblas_test.cpp auxil/auxiliary_gtest.cpp auxil/set_get_mode_gtest.cpp auxil/set_get_matrix_vector_gtest.cpp blas1/asum_gtest.cpp blas1/axpy_gtest.cpp blas1/copy_gtest.cpp blas1/dot_gtest.cpp blas1/iamaxmin_gtest.cpp blas1/nrm2_gtest.cpp blas1/rot_gtest.cpp blas1/scal_gtest.cpp blas1/swap_gtest.cpp blas2/gbmv_gtest.cpp blas2/gemv_gtest.cpp blas2/ger_gtest.cpp blas2/hbmv_gtest.cpp blas2/hemv_gtest.cpp blas2/her_gtest.cpp blas2/her2_gtest.cpp blas2/hpmv_gtest.cpp blas2/hpr_gtest.cpp blas2/hpr2_gtest.cpp blas2/sbmv_gtest.cpp blas2/spmv_gtest.cpp blas2/spr_gtest.cpp blas2/spr2_gtest.cpp blas2/symv_gtest.cpp blas2/syr_gtest.cpp blas2/syr2_gtest.cpp blas2/tbmv_gtest.cpp blas2/tbsv_gtest.cpp blas2/tpmv_gtest.cpp blas2/tpsv_gtest.cpp blas2/trmv_gtest.cpp blas2/trsv_gtest.cpp blas3/dgmm_gtest.cpp blas3/gemm_gtest.cpp blas3/hemm_gtest.cpp blas3/geam_gtest.cpp blas3/herk_gtest.cpp blas3/her2k_gtest.cpp blas3/herkx_gtest.cpp blas3/symm_gtest.cpp blas3/syrk_gtest.cpp blas3/syr2k_gtest.cpp blas3/syrkx_gtest.cpp blas3/trsm_gtest.cpp blas3/trmm_gtest.cpp blas3/trtri_gtest.cpp blas_ex/axpy_ex_gtest.cpp blas_ex/dot_ex_gtest.cpp blas_ex/nrm2_ex_gtest.cpp blas_ex/rot_ex_gtest.cpp blas_ex/scal_ex_gtest.cpp blas_ex/trsm_ex_gtest.cpp blas_ex/gemm_ex_gtest.cpp ) if( BUILD_WITH_SOLVER ) set( hipblas_solver_test_source solver/getrf_gtest.cpp solver/getrs_gtest.cpp solver/getri_gtest.cpp solver/geqrf_gtest.cpp solver/gels_gtest.cpp ) endif( ) set( hipblas_test_common ../common/utility.cpp ../common/cblas_interface.cpp ../common/clients_common.cpp ../common/norm.cpp ../common/unit.cpp ../common/near.cpp ../common/arg_check.cpp ../common/argument_model.cpp ../common/hipblas_arguments.cpp ../common/hipblas_parse_data.cpp ../common/hipblas_datatype2string.cpp ../common/hipblas_template_specialization.cpp ../common/host_alloc.cpp ${BLIS_CPP} ) add_executable( hipblas-test ${hipblas_f90_source} ${hipblas_test_source} ${hipblas_solver_test_source} ${hipblas_test_common} ) add_executable( hipblas_v2-test ${hipblas_f90_source} ${hipblas_test_source} ${hipblas_solver_test_source} ${hipblas_test_common} ) target_include_directories( hipblas-test PRIVATE $ ) target_include_directories( hipblas_v2-test PRIVATE $ ) target_compile_definitions( hipblas-test PRIVATE GOOGLE_TEST ${BLIS_DEFINES} ) target_compile_definitions( hipblas_v2-test PRIVATE GOOGLE_TEST ${BLIS_DEFINES} ) # External header includes included as SYSTEM files target_include_directories( hipblas-test SYSTEM PRIVATE $ $ $ $ $ $ ${ROCM_PATH}/include ) target_include_directories( hipblas_v2-test SYSTEM PRIVATE $ $ $ $ $ $ ${ROCM_PATH}/include ) target_link_libraries( hipblas-test PRIVATE roc::hipblas GTest::GTest ) target_link_libraries( hipblas_v2-test PRIVATE roc::hipblas GTest::GTest ) if (NOT WIN32) target_link_libraries( hipblas-test PRIVATE hipblas_fortran_client ) target_link_libraries( hipblas_v2-test PRIVATE hipblas_fortran_client ) endif() # need mf16c flag for float->half convertion target_compile_options( hipblas-test PRIVATE -mf16c ) # -Wno-deprecated-declarations ) target_compile_options( hipblas_v2-test PRIVATE -mf16c ) # -Wno-deprecated-declarations ) target_compile_options(hipblas-test PRIVATE $<$:${COMMON_CXX_OPTIONS}>) target_compile_options(hipblas_v2-test PRIVATE $<$:${COMMON_CXX_OPTIONS}>) target_compile_definitions( hipblas-test PRIVATE ${COMMON_DEFINES} ) target_compile_definitions( hipblas_v2-test PRIVATE ${COMMON_DEFINES} HIPBLAS_V2 ) target_link_libraries( hipblas-test PRIVATE ${BLAS_LIBRARY} ${COMMON_LINK_LIBS} ) target_link_libraries( hipblas_v2-test PRIVATE ${BLAS_LIBRARY} ${COMMON_LINK_LIBS} ) if (NOT WIN32) target_link_libraries( hipblas-test PRIVATE stdc++fs ) target_link_libraries( hipblas_v2-test PRIVATE stdc++fs ) endif() if(HIP_PLATFORM STREQUAL amd) target_link_libraries( hipblas-test PRIVATE hip::host ) target_link_libraries( hipblas_v2-test PRIVATE hip::host ) if( CUSTOM_TARGET ) target_link_libraries( hipblas-test PRIVATE hip::${CUSTOM_TARGET} ) target_link_libraries( hipblas_v2-test PRIVATE hip::${CUSTOM_TARGET} ) endif( ) else( ) target_include_directories( hipblas-test PRIVATE $ ) target_include_directories( hipblas_v2-test PRIVATE $ ) target_link_libraries( hipblas-test PRIVATE ${CUDA_LIBRARIES} ) target_link_libraries( hipblas_v2-test PRIVATE ${CUDA_LIBRARIES} ) endif( ) if (WIN32) # for now adding in all .dll as dependency chain is not cmake based on win32 file( GLOB third_party_dlls LIST_DIRECTORIES OFF CONFIGURE_DEPENDS ${LAPACK_DIR}/bin/*.dll ${BLIS_DIR}/lib/*.dll ${OPENBLAS_DIR}/bin/*.dll ${HIP_DIR}/bin/amd*.dll ${HIP_DIR}/bin/hiprt*.dll ${HIP_DIR}/bin/hipinfo.exe ${ROCBLAS_PATH}/bin/rocblas*.dll ${ROCSOLVER_PATH}/bin/rocsolver*.dll ${CMAKE_SOURCE_DIR}/rtest.* C:/Windows/System32/libomp140*.dll ) foreach( file_i ${third_party_dlls}) add_custom_command( TARGET hipblas-test POST_BUILD COMMAND ${CMAKE_COMMAND} ARGS -E copy ${file_i} ${PROJECT_BINARY_DIR}/staging/ ) endforeach( file_i ) add_custom_command( TARGET hipblas-test POST_BUILD COMMAND ${CMAKE_COMMAND} ARGS -E copy_directory ${ROCBLAS_PATH}/bin/rocblas/library/ ${PROJECT_BINARY_DIR}/staging/library/) endif() set_target_properties( hipblas-test PROPERTIES CXX_STANDARD 17 CXX_STANDARED_REQUIRED ON CXX_EXTENSIONS OFF IMPORT_PREFIX "" IMPORT_SUFFIX ".lib" LINKER_LANGUAGE CXX RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) set_target_properties( hipblas_v2-test PROPERTIES CXX_STANDARD 17 CXX_STANDARED_REQUIRED ON CXX_EXTENSIONS OFF IMPORT_PREFIX "" IMPORT_SUFFIX ".lib" LINKER_LANGUAGE CXX RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) set( HIPBLAS_TEST_DATA "${PROJECT_BINARY_DIR}/staging/hipblas_gtest.data") set( HIPBLAS_V2_TEST_DATA "${PROJECT_BINARY_DIR}/staging/hipblas_v2_gtest.data") set( HIPBLAS_AUX_YAML_DATA auxil/set_get_matrix_vector_gtest.yaml auxil/set_get_mode_gtest.yaml ) set( HIPBLAS_L1_YAML_DATA blas1/asum_gtest.yaml blas1/axpy_gtest.yaml blas1/copy_gtest.yaml blas1/dot_gtest.yaml blas1/iamaxmin_gtest.yaml blas1/nrm2_gtest.yaml blas1/rot_gtest.yaml blas1/scal_gtest.yaml blas1/swap_gtest.yaml ) set( HIPBLAS_L2_YAML_DATA blas2/gbmv_gtest.yaml blas2/gemv_gtest.yaml blas2/ger_gtest.yaml blas2/hbmv_gtest.yaml blas2/hemv_gtest.yaml blas2/her_gtest.yaml blas2/her2_gtest.yaml blas2/hpmv_gtest.yaml blas2/hpr_gtest.yaml blas2/hpr2_gtest.yaml blas2/sbmv_gtest.yaml blas2/spmv_gtest.yaml blas2/spr_gtest.yaml blas2/spr2_gtest.yaml blas2/symv_gtest.yaml blas2/syr_gtest.yaml blas2/syr2_gtest.yaml blas2/tbmv_gtest.yaml blas2/tbsv_gtest.yaml blas2/tpmv_gtest.yaml blas2/tpsv_gtest.yaml blas2/trmv_gtest.yaml blas2/trsv_gtest.yaml ) set( HIPBLAS_L3_YAML_DATA blas3/dgmm_gtest.yaml blas3/geam_gtest.yaml blas3/gemm_gtest.yaml blas3/hemm_gtest.yaml blas3/herk_gtest.yaml blas3/her2k_gtest.yaml blas3/herkx_gtest.yaml blas3/symm_gtest.yaml blas3/syrk_gtest.yaml blas3/syr2k_gtest.yaml blas3/syrkx_gtest.yaml blas3/trmm_gtest.yaml blas3/trsm_gtest.yaml blas3/trtri_gtest.yaml ) set( HIPBLAS_EX_YAML_DATA blas_ex/axpy_ex_gtest.yaml blas_ex/dot_ex_gtest.yaml blas_ex/nrm2_ex_gtest.yaml blas_ex/rot_ex_gtest.yaml blas_ex/scal_ex_gtest.yaml blas_ex/gemm_ex_gtest.yaml blas_ex/trsm_ex_gtest.yaml ) if( BUILD_WITH_SOLVER ) set( HIPBLAS_SOLVER_YAML_DATA solver/gels_gtest.yaml solver/geqrf_gtest.yaml solver/getrf_gtest.yaml solver/getri_gtest.yaml solver/getrs_gtest.yaml ) endif() add_custom_command( OUTPUT "${HIPBLAS_TEST_DATA}" COMMAND ${python} ../common/hipblas_gentest.py -I ../include hipblas_gtest.yaml -o "${HIPBLAS_TEST_DATA}" DEPENDS ../common/hipblas_gentest.py ../include/hipblas_common.yaml "${HIPBLAS_AUX_YAML_DATA}" "${HIPBLAS_L1_YAML_DATA}" "${HIPBLAS_L2_YAML_DATA}" "${HIPBLAS_L3_YAML_DATA}" "${HIPBLAS_EX_YAML_DATA}" "${HIPBLAS_SOLVER_YAML_DATA}" hipblas_gtest.yaml WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" ) add_custom_command( OUTPUT "${HIPBLAS_V2_TEST_DATA}" COMMAND ${python} ../common/hipblas_gentest.py -I ../include hipblas_gtest.yaml -o "${HIPBLAS_V2_TEST_DATA}" --hipblas_v2 DEPENDS ../common/hipblas_gentest.py ../include/hipblas_common.yaml "${HIPBLAS_AUX_YAML_DATA}" "${HIPBLAS_L1_YAML_DATA}" "${HIPBLAS_L2_YAML_DATA}" "${HIPBLAS_L3_YAML_DATA}" "${HIPBLAS_EX_YAML_DATA}" "${HIPBLAS_SOLVER_YAML_DATA}" hipblas_gtest.yaml WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" ) add_custom_target( hipblas-test-data DEPENDS "${HIPBLAS_TEST_DATA}" ) add_custom_target( hipblas_v2-test-data DEPENDS "${HIPBLAS_V2_TEST_DATA}" ) add_dependencies( hipblas-test hipblas-test-data hipblas-common ) add_dependencies( hipblas_v2-test hipblas_v2-test-data hipblas-common ) rocm_install(TARGETS hipblas-test COMPONENT tests) rocm_install(TARGETS hipblas_v2-test COMPONENT tests) rocm_install(FILES ${HIPBLAS_TEST_DATA} DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT tests) rocm_install(FILES ${HIPBLAS_V2_TEST_DATA} DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT tests) hipBLAS-rocm-6.4.3/clients/gtest/auxil/000077500000000000000000000000001500474223100176135ustar00rootroot00000000000000hipBLAS-rocm-6.4.3/clients/gtest/auxil/auxiliary_gtest.cpp000066400000000000000000000031371500474223100235400ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2016-2023 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ************************************************************************ */ #include "utility.h" #include #include #include namespace { TEST(hipblas_auxiliary, statusToString) { EXPECT_EQ(0, strcmp("HIPBLAS_STATUS_ALLOC_FAILED", hipblasStatusToString(HIPBLAS_STATUS_ALLOC_FAILED))); } } // namespace hipBLAS-rocm-6.4.3/clients/gtest/auxil/set_get_matrix_vector_gtest.cpp000066400000000000000000000141731500474223100261330ustar00rootroot00000000000000/* ************************************************************************ * Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- * ies of the Software, and to permit persons to whom the Software is furnished * to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * ************************************************************************ */ #include "auxil/testing_set_get_matrix.hpp" #include "auxil/testing_set_get_matrix_async.hpp" #include "auxil/testing_set_get_vector.hpp" #include "auxil/testing_set_get_vector_async.hpp" #include "hipblas_data.hpp" #include "hipblas_test.hpp" #include "type_dispatch.hpp" namespace { // possible aux test cases enum aux_test_type { SG_MATRIX, SG_MATRIX_ASYNC, SG_VECTOR, SG_VECTOR_ASYNC }; // aux test template template