File librsync-logn-sumset.patch of Package librsync

From: Victor Denisov ( victordenisov ) - 2012-09-24 10:07:15 PDT
URL: http://sourceforge.net/tracker/?func=detail&aid=3571263&group_id=56125&atid=479441
Subject: performance issue resolution for large files - ID: 3571263

When files being rsynced are hundreds of Gbytes size collisions in hash table
kill librsync.  So linear collision resolution has been replaced with log n
collision resolution based on binary search.  Size of hash table is 65536
buckets. So when files size is (block_size * 65536 * t) then linear collision
resolution is t / (log t) slower than binary search resolution. If block size
is 2048 bytes then for 1TB speed up is 630 times. for 100GB - 80 times.

Index: b/sumset.h
===================================================================
--- a/sumset.h
+++ b/sumset.h
@@ -39,6 +39,11 @@ typedef struct rs_target {
 
 typedef struct rs_block_sig rs_block_sig_t;
 
+typedef struct tag_table_entry {
+    int l;
+    int r;
+} tag_table_entry_t ;
+
 /*
  * This structure describes all the sums generated for an instance of
  * a file.  It incorporates some redundancy to make it easier to
@@ -50,8 +55,8 @@ struct rs_signature {
     int             remainder;	/* flength % block_length */
     int             block_len;	/* block_length */
     int             strong_sum_len;
-    rs_block_sig_t  *block_sigs; /* points to info for each chunk */
-    int             *tag_table;
+    rs_block_sig_t     *block_sigs; /* points to info for each chunk */
+    tag_table_entry_t  *tag_table;
     rs_target_t     *targets;
 };
openSUSE Build Service is sponsored by