File rocblas.spec of Package rocblas

%if 0%{?suse_version}
%global flavor @BUILD_FLAVOR@%{?nil}
%if "%{?flavor}" != ""
%define _flavor %{flavor}
%define __flavor %(a=%flavor; a=${a/_minus/-}; a=${a/_plus/+}; echo ${a/_/:})
%endif
%define use_flavors 1
%else
%define use_flavors 0
%endif

# Use ninja if it is available
# Ninja is available on suse but obs times out with ninja build, make doesn't
%if 0%{?fedora}
%bcond_without ninja
%else
%bcond_with ninja
%endif

%{rocm_set_libpackage_name rocblas -s 4}

%bcond_with debug
%if %{with debug}
%global build_type DEBUG
%else
%global build_type RELEASE
%endif

%bcond_without compress
%if %{with compress}
%global build_compress ON
%else
%global build_compress OFF
%endif

%bcond_without test
%if %{with test}
%global build_test ON
%global __brp_check_rpaths %{nil}
%else
%global build_test OFF
%endif

# Option to test suite for testing on real HW:
# May have to set gpu under test with
# export HIP_VISIBLE_DEVICES=<num> - 0, 1 etc.
%bcond_without check

# Tensile in 6.4 does not support generics
# https://github.com/ROCm/Tensile/issues/2124
%bcond_without tensile
%if %{with tensile}
%global build_tensile ON
%else
%global build_tensile OFF
%endif
%if 0%{?rhel} || 0%{?sle_version} > 160000
%bcond_with msgpack
%else
%bcond_without msgpack
%endif

%global upstreamname rocBLAS
%global rocm_release 6.4
%global rocm_patch 0
%global rocm_version %{rocm_release}.%{rocm_patch}

%global toolchain rocm
# hipcc does not support some clang flags
%global build_cxxflags %(echo %{optflags} | sed -e 's/-fstack-protector-strong/-Xarch_host -fstack-protector-strong/' -e 's/-fcf-protection/-Xarch_host -fcf-protection/')

%if 0%{?rhel} && 0%{?rhel} < 10
# On CS9: /usr/bin/debugedit: Cannot handle 8-byte build ID
%global debug_package %{nil}
%endif

# Compression type and level for source/binary package payloads.
#  "w7T0.xzdio"	xz level 7 using %%{getncpus} threads
%global _source_payload w7T0.xzdio
%global _binary_payload w7T0.xzdio

# SUSE/OSB times out because -O is added to the make args
# This accumulates all the output from the long running tensile
# jobs.
%global _make_output_sync %{nil}

# OracleLinux 9 has a problem with it's strip not recognizing *.co's
%global __strip %rocmllvm_bindir/llvm-strip


%define cmake_config \\\
  -DCMAKE_CXX_COMPILER=hipcc \\\
  -DCMAKE_C_COMPILER=hipcc \\\
  -DCMAKE_LINKER=%rocmllvm_bindir/ld.lld \\\
  -DCMAKE_AR=%rocmllvm_bindir/llvm-ar \\\
  -DCMAKE_RANLIB=%rocmllvm_bindir/llvm-ranlib \\\
  -DCMAKE_BUILD_TYPE=%{build_type} \\\
  -DCMAKE_PREFIX_PATH=%{rocmllvm_cmakedir}/.. \\\
  -DCMAKE_SKIP_RPATH=ON \\\
  -DCMAKE_VERBOSE_MAKEFILE=ON \\\
  -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \\\
  -DROCM_SYMLINK_LIBS=OFF \\\
  -DHIP_PLATFORM=amd \\\
  -DBUILD_CLIENTS_BENCHMARKS=%{build_test} \\\
  -DBUILD_CLIENTS_TESTS=%{build_test} \\\
  -DBUILD_CLIENTS_TESTS_OPENMP=OFF \\\
  -DBUILD_FORTRAN_CLIENTS=OFF \\\
  -DBLAS_LIBRARY=cblas \\\
  -DBUILD_OFFLOAD_COMPRESS=%{build_compress} \\\
  -DBUILD_WITH_HIPBLASLT=OFF \\\
  -DTensile_COMPILER=hipcc \\\
  -DTensile_CPU_THREADS=${CORES} \\\
  -DTensile_LIBRARY_FORMAT=%{tensile_library_format} \\\
  -DTensile_VERBOSE=%{tensile_verbose} \\\
  -DTensile_DIR=${TP}/cmake \\\
  -DDISABLE_ROCTRACER=ON \\\
  -DBUILD_WITH_PIP=OFF

%bcond_with generic
%global rocm_gpu_list_generic "gfx9-generic;gfx9-4-generic;gfx10-1-generic;gfx10-3-generic;gfx11-generic;gfx12-generic"
%if 0%{!?_flavor:1} || "%{?_flavor}" == "all"
%if %{with generic}
%global gpu_list %{rocm_gpu_list_generic}
%else
%global gpu_list %{rocm_gpu_list_default}
%endif
%else
%global gpu_list %_flavor
%endif

# gfx950 is an experimental target
# Enabling will short circuit the normal build.
# There is no check support.
# To use do
# $ module load rocm/gfx950
#     <do stuff>
# $ module purge
%bcond_with gfx950

Name:           %{rocblas_name}
Version:        %{rocm_version}
Release:        8%{?dist}
Summary:        BLAS implementation for ROCm
Url:            https://github.com/ROCmSoftwarePlatform/%{upstreamname}
License:        MIT AND BSD-3-Clause

Source0:        %{url}/archive/refs/tags/rocm-%{rocm_version}.tar.gz#/%{upstreamname}-%{rocm_version}.tar.gz
Source1:        rocblas.rpmlintrc
Patch2:         0001-fixup-install-of-tensile-output.patch
Patch3:         Modify-CMakeLists.txt-files-to-allow-to-build-modules-independently.patch
Patch4:         0001-offload-compress-option.patch
Patch6:         0001-option-to-disable-roctracer-logging.patch

BuildRequires:  cmake
BuildRequires:  gcc-c++
BuildRequires:  rocm-cmake
BuildRequires:  rocm-comgr-devel
BuildRequires:  rocm-compilersupport-macros
BuildRequires:  rocm-hip-devel
BuildRequires:  rocm-runtime-devel
BuildRequires:  rocm-rpm-macros
BuildRequires:  rocm-rpm-macros-modules
%if 0%{?suse_version}
%{?with_ninja:BuildRequires:  ninja}
%else
%{?with_ninja:BuildRequires:  ninja-build}
%endif

%if %{with tensile}
%if 0%{?suse_version}
BuildRequires:  python3-tensile-devel
BuildRequires:  python3-joblib
%else
BuildRequires:  python3dist(tensile)
%endif # suse_version
%if %{with msgpack}
%if 0%{?suse_version}
BuildRequires:  msgpack-cxx-devel
%else
BuildRequires:  msgpack-devel
%endif #suse_version
%endif # msgpack
%endif # tensile
%rocm_set_tensile

%if %{with compress}
BuildRequires:  pkgconfig(libzstd)
%endif

%if %{with test}

%if 0%{?suse_version}
BuildRequires: %{python_module PyYAML}
%else
BuildRequires:  python3dist(pyyaml)
%endif
BuildRequires:  blas-devel
BuildRequires:  libomp-devel
BuildRequires:  rocminfo
BuildRequires:  rocm-smi-devel
BuildRequires:  roctracer-devel

%if 0%{?suse_version}
BuildRequires:  cblas-devel
BuildRequires:  gcc-fortran
BuildRequires:  gtest
%else
BuildRequires:  gtest-devel
%endif

%endif

%rocm_set_ninja

Provides:       rocblas = %{version}-%{release}

# Only x86_64 works right now:
ExclusiveArch:  x86_64

%description
rocBLAS is the AMD library for Basic Linear Algebra Subprograms
(BLAS) on the ROCm platform. It is implemented in the HIP
programming language and optimized for AMD GPUs.

%post -p /sbin/ldconfig
%postun -p /sbin/ldconfig

%package devel
Summary:        Libraries and headers for %{name}
Requires:       %{name}%{?_isa} = %{version}-%{release}
Requires:       cmake(hip)
Provides:       rocblas-devel = %{version}-%{release}

%description devel
%{summary}

%package -n rocblas-tensile-%{?_flavor}
Summary:       ROCBlas Tensile Module for %{?__flavor} Architecture
Requires:      %{name} = %version

%description -n rocblas-tensile-%{?_flavor}
BLAS architecture modules for %{?__flavor} AMDGPU architectures

%if %{with test}
%package test
Summary:        Tests for %{name}
Requires:       diffutils
Requires:       %{name}%{?_isa} = %{version}-%{release}

%description test
%{summary}
%endif

%if %{with gfx950}

%package gfx950
Summary:        The gfx950 rocBLAS package
Provides:       rocblas-gfx950 = %{version}-%{release}
Conflicts:      %{name}

%description gfx950
%{summary}

%package gfx950-devel
Summary:        The gfx950 rocBLAS development package
Requires:       %{name}-gfx950%{?_isa} = %{version}-%{release}
Provides:       rocblas-gfx950-devel = %{version}-%{release}
Conflicts:      %{name}-devel

%description gfx950-devel
%{summary}

%if %{with test}
%package gfx950-test
Summary:        The gfx950 rocBLAS test package
Requires:       %{name}-gfx950%{?_isa} = %{version}-%{release}
Conflicts:      %{name}-test

%description gfx950-test
%{summary}

%endif # gfx950-test
%endif # gfx950

%prep
%autosetup -p1 -n %{upstreamname}-rocm-%{version}
sed -i -e 's@set( BLAS_LIBRARY "blas" )@set( BLAS_LIBRARY "cblas" )@' clients/CMakeLists.txt
sed -i -e 's@target_link_libraries( rocblas-test PRIVATE ${BLAS_LIBRARY} ${GTEST_BOTH_LIBRARIES} roc::rocblas )@target_link_libraries( rocblas-test PRIVATE cblas ${GTEST_BOTH_LIBRARIES} roc::rocblas )@' clients/gtest/CMakeLists.txt

# no git in this build
sed -i -e 's@find_package(Git REQUIRED)@find_package(Git)@' library/CMakeLists.txt

# On Tumbleweed Q2,2025
# /usr/include/gtest/internal/gtest-port.h:279:2: error: C++ versions less than C++14 are not supported.
#   279 | #error C++ versions less than C++14 are not supported.
# Convert the c++11's to c++14
sed -i -e 's@CXX_STANDARD 11@CXX_STANDARD 14@' clients/samples/CMakeLists.txt

%if 0%{?suse_version}
# Suse's libgfortran.so for gcc 14 is here
# /usr/lib64/gcc/x86_64-suse-linux/14/libgfortran.so
# Without adding this path with -L, it isn't found, but thankfully it isn't really needed
sed -i -e 's@list( APPEND COMMON_LINK_LIBS "-lgfortran")@#list( APPEND COMMON_LINK_LIBS "-lgfortran")@' clients/{benchmarks,gtest}/CMakeLists.txt
%endif

%build

# With compat llvm the system clang is wrong
CLANG_PATH=`hipconfig --hipclangpath`
export TENSILE_ROCM_ASSEMBLER_PATH=${CLANG_PATH}/clang++
export TENSILE_ROCM_OFFLOAD_BUNDLER_PATH=${CLANG_PATH}/clang-offload-bundler
# Work around problem with koji's ld
export HIPCC_LINK_FLAGS_APPEND=-fuse-ld=lld

%if %{with tensile}
TP=`/usr/bin/TensileGetPath`
%endif

CORES=`lscpu | grep 'Core(s)' | awk '{ print $4 }'`
if [ ${CORES}x = x ]; then
    CORES=1
fi
# Try again..
if [ ${CORES} = 1 ]; then
    CORES=`lscpu | grep '^CPU(s)' | awk '{ print $2 }'`
    if [ ${CORES}x = x ]; then
        CORES=4
    fi
fi

%if %{with gfx950}

module load rocm/gfx950

%cmake %{cmake_generator} %{cmake_config} \
    -DGPU_TARGETS=${ROCM_GPUS} \
    -DBUILD_WITH_TENSILE=OFF \
    -DCMAKE_INSTALL_BINDIR=${ROCM_BIN} \
    -DCMAKE_INSTALL_INCLUDEDIR=${ROCM_INCLUDE} \
    -DCMAKE_INSTALL_LIBDIR=${ROCM_LIB}

%else # gfx950

%if 0%{!?_flavor:1}
export TENSILE_SKIP_LIBRARY=true
%endif
%cmake %{cmake_generator} %{cmake_config} \
    -DGPU_TARGETS=%{gpu_list} \
    -DBUILD_WITH_TENSILE=%{build_tensile} \
    -DCMAKE_INSTALL_LIBDIR=%_libdir \

%endif # gfx950

%cmake_build %{?_flavor:TENSILE_LIBRARY_TARGET}
%if %{with gfx950}
module purge
%endif

%install
%if 0%{?_flavor:1}
DESTDIR=%{buildroot} /usr/bin/cmake -P build/library/src/TensileInstall/cmake_install.cmake
%else
%cmake_install

if [ -f %{buildroot}%{_prefix}/share/doc/rocblas/LICENSE.md ]; then
    rm %{buildroot}%{_prefix}/share/doc/rocblas/LICENSE.md
fi
%endif

%check
%if %{with check} && %{with test} && ! %{use_flavors}
export LD_LIBRARY_PATH=%{rocm_builddir}/library/src:$LD_LIBRARY_PATH
%{rocm_builddir}/clients/staging/rocblas-test --gtest_brief=1
%endif

%if %{with gfx950}
%files gfx950
%license LICENSE.md
%{_libdir}/rocm/gfx950/lib/librocblas.so.4{,.*}

%files gfx950-devel
%dir %{_libdir}/rocm/gfx950/include/rocblas
%dir %{_libdir}/rocm/gfx950/lib/cmake/rocblas
%{_libdir}/rocm/gfx950/include/rocblas/rocblas_module.f90
%{_libdir}/rocm/gfx950/lib/librocblas.so
%{_libdir}/rocm/gfx950/lib/cmake/rocblas/*.cmake

%if %{with test}
%files gfx950-test
%{_libdir}/rocm/gfx950/bin/rocblas*
%endif

%else # gfx950

%if 0%{!?_flavor:1}
%files
%license LICENSE.md
%{_libdir}/librocblas.so.4{,.*}
%if %{with tensile}
%if ! %{?use_flavors}
%dir %{_libdir}/rocblas
%dir %{_libdir}/rocblas/library
%{_libdir}/rocblas/library/Kernels*
%{_libdir}/rocblas/library/Tensile*
%endif
%endif # tensile

%files devel
%doc README.md
%dir %{_libdir}/cmake/rocblas
%dir %{_includedir}/rocblas
%{_includedir}/rocblas/*
%{_libdir}/cmake/rocblas/*.cmake
%{_libdir}/librocblas.so

%if %{with test}
%files test
%{_bindir}/rocblas*
%endif

%else # ?_flavor

%if %{with tensile}
%files -n rocblas-tensile-%{?_flavor}
%dir %{_libdir}/rocblas
%dir %{_libdir}/rocblas/library
%{_libdir}/rocblas/library/Kernels*
%{_libdir}/rocblas/library/Tensile*
%endif

%endif # ?_flavor
%endif # gfx950

%changelog
* Tue Jun 12 2025 Egbert Eich <eich@suse.com> - 6.4.0-8
- Use distro specific macros to hide some distribution differences.
- Build and package core library and arch dependent
  tensile modules separately to parallelize the build.
- Restructure spec file (move bcond_with* settings to
  the top).
- Disable %%check stage if flavors are used.
- Fix build and runtime dependencies of test package.

* Wed Jun 11 2025 Tom Rix <Tom.Rix@amd.com> - 6.4.0-8
- Remove suse check for using ldconfig

* Sun May 11 2025 Tom Rix <Tom.Rix@amd.com> - 6.4.0-7
- Add experimental gfx950

* Tue May 6 2025 Tom Rix <Tom.Rix@amd.com> - 6.4.0-6
- disable roctracer for everyone

* Tue Apr 29 2025 Tim Flink <tflink@fedoraproject.org> - 6.4.0-5
- add patch for option to disable roctracer logging
- disable roctracer logging for rhel builds
- allow for builds on rhel with ninja

* Tue Apr 29 2025 Tom Rix <Tom.Rix@amd.com> - 6.4.0-4
- Improve testing for suse

* Sat Apr 26 2025 Tom Rix <Tom.Rix@amd.com> - 6.4.0-3
- Add generic gpus

* Wed Apr 23 2025 Tom Rix <Tom.Rix@amd.com> - 6.4.0-2
- Use joblib on sle 15.6 and 16.0

* Fri Apr 18 2025 Tom Rix <Tom.Rix@amd.com> - 6.4.0-1
- Update to 6.4.0

* Thu Apr 10 2025 Tom Rix <Tom.Rix@amd.com> - 6.3.0-12
- Reenable ninja

* Fri Apr 4 2025 Tom Rix <Tom.Rix@amd.com> - 6.3.0-11
- Use rocm-llvm strip

* Thu Feb 27 2025 Cristian Le <git@lecris.dev> - 6.3.0-10
- Add hip requirement to devel package

* Thu Feb 27 2025 Tom Rix <Tom.Rix@amd.com> - 6.3.0-9
- Enable tensile for RHEL

* Wed Feb 26 2025 Tom Rix <Tom.Rix@amd.com> - 6.3.0-8
- Enable tensile for SUSE

* Sun Feb 23 2025 Tom Rix <Tom.Rix@amd.com> - 6.3.0-7
- Use tensile verbosity to avoid OSB timeout

* Wed Feb 19 2025 Tom Rix <Tom.Rix@amd.com> - 6.3.0-6
- Use tensile cmake from the python location

* Tue Feb 11 2025 Tom Rix <Tom.Rix@amd.com> - 6.3.0-5
- Remove multibuild
- Fix SLE 15.6

* Sat Jan 18 2025 Tom Rix <Tom.Rix@amd.com> - 6.3.0-4
- multithread rpm compress

* Tue Jan 14 2025 Tom Rix <Tom.Rix@amd.com> - 6.3.0-3
- build requires gcc-c++

* Fri Dec 20 2024 Tom Rix <Tom.Rix@amd.com> - 6.3.0-2
- Build type should be release

* Fri Dec 6 2024 Tom Rix <Tom.Rix@amd.com> - 6.3.0-1
- Update to 6.3

* Sun Nov 10 2024 Tom Rix <Tom.Rix@amd.com> - 6.2.1-1
- Stub for tumbleweed


openSUSE Build Service is sponsored by