File samtools-0.1.19-R-fixes.patch of Package samtools-legacy

diff -up samtools-0.1.19/faidx.c.Rfixes samtools-0.1.19/faidx.c
--- samtools-0.1.19/faidx.c.Rfixes	2015-05-29 23:25:42.646798526 -0400
+++ samtools-0.1.19/faidx.c	2015-05-29 23:28:55.628504008 -0400
@@ -94,6 +94,11 @@ faidx_t *fai_build_core(RAZF *rz)
 				}
 				name[l_name++] = c;
 			}
+			if (m_name < l_name + 2) { /* MTM: 0-length id */
+				m_name = l_name + 2;
+				kroundup32(m_name);
+				name = (char*)realloc(name, m_name);
+			}
 			name[l_name] = '\0';
 			if (ret == 0) {
 				fprintf(stderr, "[fai_build_core] the last entry has no sequence\n");
@@ -127,11 +132,24 @@ faidx_t *fai_build_core(RAZF *rz)
 			}
 		}
 	}
-	fai_insert_index(idx, name, len, line_len, line_blen, offset);
+	if (len < 0) {               /* MTM; should also check state */
+        fprintf(stderr, "[fai_build_core] no entries in file\n");
+        free(name); fai_destroy(idx);
+        return 0;
+    }
+    fai_insert_index(idx, name, len, line_len, line_blen, offset);
 	free(name);
 	return idx;
 }
 
+// HP - Jan 13, 2014: I've no idea why the original authors of the fai_save()
+// and fai_read() functions below decided to use the (long) type instead of
+// (long long) for the sequence offsets on Windows. Problem with this is that
+// these functions then break if the FASTA file contains sequences with offsets
+// > LONG_MAX which turns out to be 2^31-1 on Windows, hence not big enough if
+// the FASTA file contains the full genome sequences for Human and other
+// mammals. So I modified fai_save() and fai_read() to always use (long long).
+
 void fai_save(const faidx_t *fai, FILE *fp)
 {
 	khint_t k;
@@ -140,11 +158,12 @@ void fai_save(const faidx_t *fai, FILE *
 		faidx1_t x;
 		k = kh_get(s, fai->hash, fai->name[i]);
 		x = kh_value(fai->hash, k);
-#ifdef _WIN32
-		fprintf(fp, "%s\t%d\t%ld\t%d\t%d\n", fai->name[i], (int)x.len, (long)x.offset, (int)x.line_blen, (int)x.line_len);
-#else
+// HP - Jan 13, 2014: See above note.
+//#ifdef _WIN32
+//		fprintf(fp, "%s\t%d\t%ld\t%d\t%d\n", fai->name[i], (int)x.len, (long)x.offset, (int)x.line_blen, (int)x.line_len);
+//#else
 		fprintf(fp, "%s\t%d\t%lld\t%d\t%d\n", fai->name[i], (int)x.len, (long long)x.offset, (int)x.line_blen, (int)x.line_len);
-#endif
+//#endif
 	}
 }
 
@@ -153,22 +172,24 @@ faidx_t *fai_read(FILE *fp)
 	faidx_t *fai;
 	char *buf, *p;
 	int len, line_len, line_blen;
-#ifdef _WIN32
-	long offset;
-#else
+// HP - Jan 13, 2014: See above note.
+//#ifdef _WIN32
+//	long offset;
+//#else
 	long long offset;
-#endif
+//#endif
 	fai = (faidx_t*)calloc(1, sizeof(faidx_t));
 	fai->hash = kh_init(s);
 	buf = (char*)calloc(0x10000, 1);
 	while (!feof(fp) && fgets(buf, 0x10000, fp)) {
 		for (p = buf; *p && isgraph(*p); ++p);
 		*p = 0; ++p;
-#ifdef _WIN32
-		sscanf(p, "%d%ld%d%d", &len, &offset, &line_blen, &line_len);
-#else
+// HP - Jan 13, 2014: See above note.
+//#ifdef _WIN32
+//		sscanf(p, "%d%ld%d%d", &len, &offset, &line_blen, &line_len);
+//#else
 		sscanf(p, "%d%lld%d%d", &len, &offset, &line_blen, &line_len);
-#endif
+//#endif
 		fai_insert_index(fai, buf, len, line_len, line_blen, offset);
 	}
 	free(buf);
@@ -200,6 +221,10 @@ int fai_build(const char *fn)
 		return -1;
 	}
 	fai = fai_build_core(rz);
+    if (fai == NULL) {          /* MTM */
+        free(str);
+        return -1;
+    }
 	razf_close(rz);
 	fp = fopen(str, "wb");
 	if (fp == 0) {
@@ -258,10 +283,18 @@ FILE *download_and_open(const char *fn)
 faidx_t *fai_load(const char *fn)
 {
 	char *str;
-	FILE *fp;
 	faidx_t *fai;
 	str = (char*)calloc(strlen(fn) + 5, 1);
 	sprintf(str, "%s.fai", fn);
+	fai = fai_load0(fn, str);
+	free(str);
+	return fai;
+}
+
+faidx_t *fai_load0(const char *fn, const char *str)
+{
+	FILE *fp;
+	faidx_t *fai;
 
 #ifdef _USE_KNETFILE
     if (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn)
@@ -270,7 +303,6 @@ faidx_t *fai_load(const char *fn)
         if ( !fp )
         {
             fprintf(stderr, "[fai_load] failed to open remote FASTA index %s\n", str);
-            free(str);
             return 0;
         }
     }
@@ -283,7 +315,6 @@ faidx_t *fai_load(const char *fn)
 		fp = fopen(str, "rb");
 		if (fp == 0) {
 			fprintf(stderr, "[fai_load] fail to open FASTA index.\n");
-			free(str);
 			return 0;
 		}
 	}
@@ -292,7 +323,6 @@ faidx_t *fai_load(const char *fn)
 	fclose(fp);
 
 	fai->rz = razf_open(fn, "rb");
-	free(str);
 	if (fai->rz == 0) {
 		fprintf(stderr, "[fai_load] fail to open FASTA file.\n");
 		return 0;
diff -up samtools-0.1.19/faidx.h.Rfixes samtools-0.1.19/faidx.h
--- samtools-0.1.19/faidx.h.Rfixes	2013-03-19 03:48:09.000000000 -0400
+++ samtools-0.1.19/faidx.h	2015-05-29 23:29:33.916247174 -0400
@@ -64,6 +64,13 @@ extern "C" {
 	faidx_t *fai_load(const char *fn);
 
 	/*!
+	  @abstract   Load index from "fn.fai".
+	  @param  fn  File name of the FASTA file
+          @param  index File name of the FASTA index
+	 */
+	faidx_t *fai_load0(const char *fn, const char *index);
+
+	/*!
 	  @abstract    Fetch the sequence in a region.
 	  @param  fai  Pointer to the faidx_t struct
 	  @param  reg  Region in the format "chr2:20,000-30,000"
@@ -96,6 +103,22 @@ extern "C" {
 	 */
 	char *faidx_fetch_seq(const faidx_t *fai, char *c_name, int p_beg_i, int p_end_i, int *len);
 
+	/*!
+	  @abstract    Alternative to faidx_fetch_seq().
+	  @param  fai  Pointer to the faidx_t struct
+	  @param  c_name Region name
+	  @param  p_beg_i  Beginning position number (zero-based)
+	  @param  p_end_i  End position number (zero-based)
+	  @param  out  User-supplied output buffer
+	  @return      Number of bytes written; -1 on failure
+
+	  @discussion Differences with faidx_fetch_seq(): (1) writes the
+	  incoming sequence to user-supplied output buffer, (2) doesn't write
+	  the terminating null byte ('\0'), (3) properly handles 0-length
+	  sequences, (4) returns the number of bytes written; -1 on failure.
+	 */
+	int faidx_fetch_seq2(const faidx_t *fai, const char *c_name, int p_beg_i, int p_end_i, char *out);
+
 #ifdef __cplusplus
 }
 #endif
openSUSE Build Service is sponsored by