File make-memory-hog-2.diff of Package make

GNU make 3.80 is a HUGE memory hog. It calls xstrdup to build
dependency list. gnu-src-gcc.deps in libjava has 3000+ targets depend
the same 3000+ files, whose filenames are more than 260K. For this
dependency alone, make takes 3000*260K == 761MB.

This patch is a quick hack. It reduces the memory from 1.6GB to around
600MB for "make -f gnu-src-gcc.deps". I think make should use a better
memory management for strings. If my approach is OK, I can try to
use it through out make.


H.J.
----

--- make-3.80/file.c.memory	2002-10-03 19:13:42.000000000 -0700
+++ make-3.80/file.c	2006-02-01 10:45:32.000000000 -0800
@@ -434,7 +434,7 @@ snap_deps ()
 	    if (d->file == 0)
 	      d->file = enter_file (d->name);
 	    else
-	      free (d->name);
+	      hash_strfree (d->name);
 	    d->name = 0;
 	  }
   free (file_slot_0);
--- make-3.80/implicit.c.memory	2002-09-04 00:26:19.000000000 -0700
+++ make-3.80/implicit.c	2006-02-01 10:45:32.000000000 -0800
@@ -539,7 +539,7 @@ pattern_search (file, archive, depth, re
 	      dep->file = enter_file (dep->name);
               /* enter_file uses dep->name _if_ we created a new file.  */
               if (dep->name != dep->file->name)
-                free (dep->name);
+                hash_strfree (dep->name);
 	      dep->name = 0;
 	      dep->file->tried_implicit |= dep->changed;
 	    }
--- make-3.80/main.c.memory	2002-08-09 18:27:17.000000000 -0700
+++ make-3.80/main.c	2006-02-01 10:45:32.000000000 -0800
@@ -501,6 +501,7 @@ initialize_global_hash_tables ()
   init_hash_files ();
   hash_init_directories ();
   hash_init_function_table ();
+  init_hash_strings ();
 }
 
 static struct file *
--- make-3.80/make.h.memory	2002-09-11 09:55:44.000000000 -0700
+++ make-3.80/make.h	2006-02-01 10:45:32.000000000 -0800
@@ -427,6 +427,11 @@ extern char *find_char_unquote PARAMS ((
 extern char *find_percent PARAMS ((char *));
 extern FILE *open_tmpfile PARAMS ((char **, const char *));
 
+extern void init_hash_strings PARAMS ((void));
+extern char *hash_strdup PARAMS ((const char *));
+extern char *hash_savestring PARAMS ((const char *, unsigned int));
+extern void hash_strfree PARAMS ((char *));
+
 #ifndef NO_ARCHIVES
 extern int ar_name PARAMS ((char *));
 extern void ar_parse_name PARAMS ((char *, char **, char **));
--- make-3.80/misc.c.memory	2002-09-12 15:15:58.000000000 -0700
+++ make-3.80/misc.c	2006-02-01 11:05:44.000000000 -0800
@@ -18,8 +18,10 @@ along with GNU Make; see the file COPYIN
 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA.  */
 
+#include <assert.h>
 #include "make.h"
 #include "dep.h"
+#include "hash.h"
 #include "debug.h"
 
 /* Variadic functions.  We go through contortions to allow proper function
@@ -564,7 +566,7 @@ copy_dep_chain (d)
       c = (struct dep *) xmalloc (sizeof (struct dep));
       bcopy ((char *) d, (char *) c, sizeof (struct dep));
       if (c->name != 0)
-	c->name = xstrdup (c->name);
+	c->name = hash_strdup (c->name);
       c->next = 0;
       if (firstnew == 0)
 	firstnew = lastnew = c;
@@ -891,3 +893,154 @@ atomic_readdir(dir)
 }
 
 #endif  /* HAVE_BROKEN_RESTART */
+
+/* Hash table of duplicated strings.  */
+
+struct hash_string
+{
+  char *string;
+  unsigned int count;
+};
+
+static unsigned long
+string_hash_1 (key)
+    const void *key;
+{
+  return_ISTRING_HASH_1 (((const struct hash_string *) key)->string);
+}
+
+static unsigned long
+string_hash_2 (key)
+    const void *key;
+{
+  return_ISTRING_HASH_2 (((const struct hash_string *) key)->string);
+}
+
+static int
+string_hash_cmp (x, y)
+    const void *x;
+    const void *y;
+{
+  return_ISTRING_COMPARE (((const struct hash_string *) x)->string,
+			  ((const struct hash_string *) y)->string);
+}
+
+static struct hash_table strings;
+
+void
+init_hash_strings ()
+{
+  hash_init (&strings, 1000, string_hash_1, string_hash_2,
+	     string_hash_cmp);
+}
+
+/* Keep track duplicated string and return the old one if exists.  */
+
+char *
+hash_strdup (ptr)
+     const char *ptr;
+{
+  struct hash_string *h, key;
+  
+  if (*ptr == '\0')
+    return "";
+
+  key.string = (char *) ptr;
+  key.count = 0;
+  h = (struct hash_string *) hash_find_item (&strings, &key);
+  if (h == NULL)
+    {
+      char *result = (char *) malloc (strlen (ptr) + 1);
+
+      if (result == NULL)
+	fatal (NILF, _("virtual memory exhausted"));
+
+      strcpy (result, ptr);
+
+      h = (struct hash_string *) malloc (sizeof (struct hash_string));
+      if (h == NULL)
+	fatal (NILF, _("virtual memory exhausted"));
+
+      h->string = result;
+      h->count = 1;
+      hash_insert (&strings, h);
+    }
+  else
+    {
+      h->count++;
+      assert (h->count != 0);
+    }
+  
+  return h->string;
+}
+
+char *
+hash_savestring (str, length)
+     const char *str;
+     unsigned int length;
+{
+  struct hash_string *h, key;
+  
+  if (length == 0 || *str == '\0')
+    return "";
+
+  key.string = alloca (length + 1);
+  key.count = 0;
+  bcopy (str, key.string, length);
+  key.string [length] = '\0';
+
+  h = (struct hash_string *) hash_find_item (&strings, &key);
+  if (h == NULL)
+    {
+      char *out = (char *) xmalloc (length + 1);
+      bcopy (str, out, length);
+      out[length] = '\0';
+
+      h = (struct hash_string *) malloc (sizeof (struct hash_string));
+      if (h == NULL)
+	fatal (NILF, _("virtual memory exhausted"));
+
+      h->string = out;
+      h->count = 1;
+      hash_insert (&strings, h);
+    }
+  else
+    {
+      h->count++;
+      assert (h->count != 0);
+    }
+  
+  return h->string;
+}
+
+void
+hash_strfree (ptr)
+     char *ptr;
+{
+  struct hash_string *h, key;
+  
+  if (*ptr == '\0')
+    return;
+
+  key.string = ptr;
+  key.count = 0;
+  h = (struct hash_string *) hash_find_item (&strings, &key);
+
+  /* Check if string comes from hash_strdup or hash_savestring.  */
+  if (h == NULL || h->string != ptr)
+    {
+      free (ptr);
+      return;
+    }
+
+  h->count--;
+  if (h->count == 0)
+    {
+      struct hash_string *d;
+
+      d = hash_delete (&strings, h);
+      assert (d == h);
+      free (h->string);
+      free (h);
+    }
+}
--- make-3.80/read.c.memory	2006-02-01 10:45:32.000000000 -0800
+++ make-3.80/read.c	2006-02-01 10:45:32.000000000 -0800
@@ -1871,8 +1871,8 @@ record_files (filenames, pattern, patter
                     fatal (flocp,
                            _("target `%s' leaves prerequisite pattern empty"),
                            name);
-		  free (d->name);
-		  d->name = savestring (buffer, o - buffer);
+		  hash_strfree (d->name);
+		  d->name = hash_savestring (buffer, o - buffer);
 		}
 	    }
 	}
@@ -2017,7 +2017,7 @@ record_files (filenames, pattern, patter
 	      while (d != 0)
 		{
 		  struct dep *nextd = d->next;
- 		  free (d->name);
+ 		  hash_strfree (d->name);
  		  free ((char *)d);
 		  d = nextd;
 		}