File 0160-sbom-fixes-sbom-cache-duplication.patch of Package erlang

From 5605cded007514c34bf4fe031a8eb7d8b89ce2a5 Mon Sep 17 00:00:00 2001
From: Kiko Fernandez-Reyes <kiko@erlang.org>
Date: Fri, 19 Dec 2025 13:46:21 +0100
Subject: [PATCH] sbom: fixes sbom cache duplication

in the SBOM GH cache, some scan results have identical scanning
provenance and scanning options, yet they were executed at different
starting times and have different scanned files. as such, tools such as
oss-review-toolkit cannot differentiate which of the scan results with
the same provenance and scanning option is the correct one.

this commit merges scan results that contain the same scanning options
and provenance. the scan results are guaranteed to contain disjoint
files due to a previous deduplication pass, so the result of scan
results with same scanning options and provenance is simply the merge of
their licenses and copyrights.
---
 .github/scripts/ort-scanner.es | 50 ++++++++++++++++++++++++++++++++--
 1 file changed, 47 insertions(+), 3 deletions(-)

diff --git a/.github/scripts/ort-scanner.es b/.github/scripts/ort-scanner.es
index 728b61ee14..2606a20749 100755
--- a/.github/scripts/ort-scanner.es
+++ b/.github/scripts/ort-scanner.es
@@ -186,14 +186,15 @@ sha1sums(GitRepo) ->
     %% Check which files on disk do not have the same sha1 as the files in the original scan
     FilesToRestore = check_files_to_restore(Input, Cache),
 
+    %% Restores from cache only the files with identical SHA1
     ScanResults = restore_from_cache(FilesToRestore,
                                      InputProvenance,
                                      Cache),
+    ScannersName = [maps:get(~"name", maps:get(~"scanner", R)) || R <- ScanResults],
 
     InputScanner = maps:get(~"scanner", Input),
     InputScanners = maps:get(~"scanners", InputScanner),
-    Scanners = [maps:get(~"name", maps:get(~"scanner", R)) || R <- ScanResults],
-    NewScanners = #{ K => Scanners || K := _ <- InputScanners},
+    NewScanners = #{ K => ScannersName || K := _ <- InputScanners},
 
     CacheConfig = maps:get(~"config", maps:get(~"scanner", Cache)),
 
@@ -428,8 +429,51 @@ replace_mappings(SPDX, Map) ->
     %% other side of NOASSERTION.
     replace_mappings(SPDX, maps:to_list(Map) ++ [{"AND NOASSERTION",""},{"NOASSERTION AND",""}]).
 
+%% deduplicates scan results as per oss-review-toolkit scan-result.json format
 deduplicate(ScanResults) ->
-    deduplicate(ScanResults, []).
+    UniqueScanResults = deduplicate(ScanResults, []),
+
+    %% some results have the exact same provenance and cater to different
+    %% files, so we merge those scan results. this is needed because ORT
+    %% cannot distinguish which of two scan results is the valid one,
+    %% given that they have the same provenance and scanning options.
+    merge_identical_scan_results(UniqueScanResults).
+
+%% merges scan results whose provenance and scanner configuration options
+%% (and scanner versions) are identical. from `deduplicate/2` we are
+%% guaranteed to get scan results with different scanned files
+merge_identical_scan_results(ScanResults) ->
+    Result = lists:foldl(
+               fun (#{~"provenance" := Provenance,
+                      ~"scanner" := Scanner,
+                      ~"summary" := Summary}, Acc) ->
+                       case maps:get({Provenance, Scanner}, Acc, false) of
+                           false ->
+                               Acc#{{Provenance, Scanner} => Summary};
+                           SummaryAcc ->
+                               %% merge results where provenance and scanning options
+                               %% are identical. otherwise ORT (oss-review-toolkit) cannot
+                               %% know which one to choose from two identical configurations.
+                               OldCp = maps:get(~"copyrights", SummaryAcc),
+                               NewCp = maps:get(~"copyrights", Summary),
+
+                               OldLs = maps:get(~"licenses", SummaryAcc),
+                               NewLs = maps:get(~"licenses", Summary),
+
+                               Acc#{{Provenance, Scanner} :=
+                                        Summary#{~"copyrights" := OldCp ++ NewCp,
+                                                 ~"licenses" := OldLs ++ NewLs}}
+                       end
+               end, #{}, ScanResults),
+    maps:fold(fun ({Provenance, Scanner}, V, Acc) ->
+                      [#{~"summary" => V,
+                         ~"provenance" => Provenance,
+                         ~"scanner" => Scanner} | Acc]
+              end, [], Result).
+
+
+%% deduplicates licenses if already found in other scanners.
+%% output: scanners have unique files in their copyrights and licenses
 deduplicate([H | T], Found) ->
     #{ ~"summary" := #{ ~"copyrights" := CS, ~"licenses" := LS} = Summary} = H,
     FilteredCS = [C || C = #{ ~"location" := #{ ~"path" := Path }} <- CS,
-- 
2.51.0

openSUSE Build Service is sponsored by