File 0002-More-gfx1151.patch of Package python-tensile

From bfbf4b5052ebcd4c59704bab0fde452f7529d965 Mon Sep 17 00:00:00 2001
From: Tom Rix <Tom.Rix@amd.com>
Date: Mon, 9 Dec 2024 06:21:13 -0800
Subject: [PATCH 2/5] More gfx1151

---
 Tensile/Common.py                                        | 2 +-
 Tensile/Source/CMakeLists.txt                            | 4 ++--
 Tensile/Source/lib/include/Tensile/AMDGPU.hpp            | 9 ++++++++-
 .../Source/lib/include/Tensile/PlaceholderLibrary.hpp    | 3 +++
 Tensile/Source/lib/source/ocl/OclUtils.cpp               | 4 ++++
 5 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/Tensile/Common.py b/Tensile/Common.py
index 3c1dab7cb255..b057dab33d7d 100644
--- a/Tensile/Common.py
+++ b/Tensile/Common.py
@@ -2460,7 +2460,7 @@ def assignGlobalParameters( config ):
     if os.name == "nt":
       globalParameters["CurrentISA"] = (9,0,6)
       printWarning("Failed to detect ISA so forcing (gfx906) on windows")
-  isasWithDisabledHWMonitor = ((9,4,1), (9,4,2), (11,0,0), (11,0,1), (11,0,2), (12,0,0), (12,0,1))
+  isasWithDisabledHWMonitor = ((9,4,1), (9,4,2), (11,0,0), (11,0,1), (11,0,2), (11,5,1), (12,0,0), (12,0,1))
   if globalParameters["CurrentISA"] in isasWithDisabledHWMonitor:
     isaString = ', '.join(map(gfxName, isasWithDisabledHWMonitor))
     printWarning(f"HardwareMonitor currently disabled for {isaString}")
diff --git a/Tensile/Source/CMakeLists.txt b/Tensile/Source/CMakeLists.txt
index e02b209a262a..f350b26caf7f 100644
--- a/Tensile/Source/CMakeLists.txt
+++ b/Tensile/Source/CMakeLists.txt
@@ -51,9 +51,9 @@ if(CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" OR CMAKE_CXX_COMPILER MATCHES ".*clang
 endif()
 
 if(CMAKE_CXX_COMPILER STREQUAL "hipcc")
-  set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures")
+  set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1151 CACHE STRING "GPU architectures")
 else()
-  set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures")
+  set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1151 CACHE STRING "GPU architectures")
 endif()
 
 include(CMakeDependentOption)
diff --git a/Tensile/Source/lib/include/Tensile/AMDGPU.hpp b/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
index 0ab8ced5cf5d..d83ee830d1da 100644
--- a/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
+++ b/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
@@ -73,7 +73,8 @@ namespace Tensile
             gfx1035 = 1035,
             gfx1100 = 1100,
             gfx1101 = 1101,
-            gfx1102 = 1102
+            gfx1102 = 1102,
+	    gfx1151 = 1151
         };
 
         static std::string toString(Processor p)
@@ -118,6 +119,8 @@ namespace Tensile
                 return "gfx1101";
             case AMDGPU::Processor::gfx1102:
                 return "gfx1102";
+	    case AMDGPU::Processor::gfx1151:
+                return "gfx1151";
             }
             return "";
         }
@@ -184,6 +187,10 @@ namespace Tensile
             {
                 return AMDGPU::Processor::gfx1102;
             }
+            else if(deviceString.find("gfx1151") != std::string::npos)
+            {
+                return AMDGPU::Processor::gfx1151;
+            }
             else
             {
                 return static_cast<AMDGPU::Processor>(0);
diff --git a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
index 10898ec2d1d6..f83713c04430 100644
--- a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
+++ b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp
@@ -58,6 +58,7 @@ namespace Tensile
         gfx1100,
         gfx1101,
         gfx1102,
+	gfx1151,
         All
     };
 
@@ -106,6 +107,8 @@ namespace Tensile
             return "TensileLibrary_*_gfx1101";
         case LazyLoadingInit::gfx1102:
             return "TensileLibrary_*_gfx1102";
+	case LazyLoadingInit::gfx1151:
+            return "TensileLibrary_*_gfx1151";
         case LazyLoadingInit::None:
             return "";
         }
diff --git a/Tensile/Source/lib/source/ocl/OclUtils.cpp b/Tensile/Source/lib/source/ocl/OclUtils.cpp
index 8ee6d21769f0..ff04c56a1025 100644
--- a/Tensile/Source/lib/source/ocl/OclUtils.cpp
+++ b/Tensile/Source/lib/source/ocl/OclUtils.cpp
@@ -188,6 +188,10 @@ namespace Tensile
             {
                 return AMDGPU::Processor::gfx1102;
             }
+            else if(deviceString.find("gfx1151") != std::string::npos)
+            {
+                return AMDGPU::Processor::gfx1151;
+            }
             else
             {
                 return static_cast<AMDGPU::Processor>(0);
-- 
2.47.1

openSUSE Build Service is sponsored by