File 6181-otp_html_check-Add-option-to-print-external-links.patch of Package erlang

From 07dc13d06072adca535e81d33cf6d750855ae989 Mon Sep 17 00:00:00 2001
From: Johannes Christ <jc@jchri.st>
Date: Thu, 1 Jun 2023 12:36:09 +0200
Subject: [PATCH 1/2] otp_html_check: Add option to print external links

---
 scripts/otp_html_check | 36 ++++++++++++++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/scripts/otp_html_check b/scripts/otp_html_check
index 3b9df7f8c6..043ed222d6 100755
--- a/scripts/otp_html_check
+++ b/scripts/otp_html_check
@@ -38,6 +38,7 @@ my @indexes =			# The order to try URL expansion
 my $html_ext = 'shtml|html|htm'; # HTML pages ends in these
 
 my @links;			# Set of [page,link] we want to check
+my @external_links;	        # Set of [page,link] for external links
 my @exclude;			# Pages/dir/prefix to exclude
 my %pages;			# Set of all files found in the file system
 				# limited by the script arguments.
@@ -59,6 +60,11 @@ my %anchor_defs;		# <a name="..."> in the form "$page#$anchor"
 
 @ARGV or usage("No base directory given");
 my $base = shift @ARGV;
+my @print_external_links = 0;
+if ($base eq "--print-external-links") {
+    @print_external_links = 1;
+    $base = shift @ARGV;
+}
 -d $base or usage("Not a directory: $base");
 $base =~ m&^/& or usage("Has to be absolute path: $base");
 $base =~ s&/+$&&;		# Remove ending slash if any
@@ -276,7 +282,15 @@ sub normalize_link {
 	$link = "$dir/$mod";
     }
 
-    return $link if $link =~ /^\w{3,10}:/i; # mailto: http: .....
+    # mailto: http: .....
+    if ($link =~ /^\w{3,10}:/i) {
+        if ((@print_external_links)
+            && ($link !~ /github\.com\/erlang\/otp/) 
+            && ($link =~ /^http/)) {
+                push(@external_links, [$page, $link]);
+        }
+        return $link;
+    }
 
     $link =~ s/%([\da-fA-F]{2})/chr(hex($1))/eg; # Translate hex to char
 
@@ -407,6 +421,21 @@ if (keys %anchor_refs) {
     }
 }
 
+if (@print_external_links) {
+  print "\n**** External links (excluding github.com/erlang/otp)\n\n";
+
+  while (@external_links) {
+    my $page_and_link = shift @external_links;
+    my ($page, $link) = @$page_and_link;
+    if ($link =~ /^\w{3,10}:/i) {
+      # Exclude "Edit on GitHub" links. These would make up
+      # the majority of the output and there's likely little
+      # value in checking them.
+      print "$page -> $link\n";
+    }
+  }
+}
+
 if (keys %missing || keys %anchor_refs) {
     exit 1;
 }
@@ -417,7 +446,7 @@ if (keys %missing || keys %anchor_refs) {
 sub usage {
   print STDERR "ERROR: ",join("\n",@_),"\n" if @_;
   print <<HERE;
-Usage: $0 BaseDirectory URL [ URLs... ] [ -- ExcludeURLs... ]
+Usage: $0 [ --print-external-links ] BaseDirectory URL [ URLs... ] [ -- ExcludeURLs... ]
 
 This script try to find out what files are used and not of your
 HTML documents, graphic files etc. It doesn't use HTTP, i.e. you
@@ -425,6 +454,9 @@ work off-line, so this script may fail to find a link. Javascripts
 and other extensions also makes it very hard. But for many sites
 it work very well.
 
+If --print-external-links is given, the script will print any links that it
+finds to external sites, excluding links to the Erlang/OTP GitHub repository.
+
 The base directory given has to start with a slash.
 
 For URLs and ExcludeURLs absolute paths or relative the base
-- 
2.35.3

openSUSE Build Service is sponsored by