A new user interface for you! Read more...

File fsck_gfs2_cannot_repair_rgrps_resulting_from_gfs_grow_plus_gfs2_convert.patch of Package cluster

commit 8fde4dae1ee517ca1f2ce2c48d006797e59e923e
Author: Bob Peterson <rpeterso@redhat.com>
Date:   Wed Feb 2 08:08:53 2011 -0600

    fsck.gfs2: can't repair rgrps resulting from gfs_grow->gfs2_convert
    
    This patch expands the algorithm that repairs destroyed resource
    groups.  It adds more levels of checking and is able to determine
    the locations of resource groups that are not evenly spaced.
    
    rhbz#576640

diff --git a/gfs2/edit/savemeta.c b/gfs2/edit/savemeta.c
index 10658a2..655a3bc 100644
--- a/gfs2/edit/savemeta.c
+++ b/gfs2/edit/savemeta.c
@@ -201,7 +201,7 @@ static int save_block(int fd, int out_fd, uint64_t blk)
 
 	if (blk > last_fs_block) {
 		fprintf(stderr, "\nWarning: bad block pointer '0x%llx' "
-			"ignored in block (block %llu (%llx))",
+			"ignored in block (block %llu (0x%llx))",
 			(unsigned long long)blk,
 			(unsigned long long)block, (unsigned long long)block);
 		return 0;
@@ -322,7 +322,9 @@ static void save_indirect_blocks(int out_fd, osi_list_t *cur_list,
 		if (height != hgt) { /* If not at max height */
 			nbh = bread(&sbd, indir_block);
 			osi_list_add_prev(&nbh->b_altlist, cur_list);
-			brelse(nbh);
+			/* The buffer_head needs to be queued ahead, so
+			   don't release it!
+			   brelse(nbh);*/
 		}
 	} /* for all data on the indirect block */
 }
@@ -368,8 +370,8 @@ static void save_inode_data(int out_fd)
 	    (S_ISDIR(inode->i_di.di_mode) ||
 	     (gfs1 && inode->i_di.__pad1 == GFS_FILE_DIR)))
 		height++;
-	else if (height && !block_is_systemfile() &&
-		 !S_ISDIR(inode->i_di.di_mode))
+	else if (height && !(inode->i_di.di_flags & GFS2_DIF_SYSTEM) &&
+		 !block_is_systemfile() && !S_ISDIR(inode->i_di.di_mode))
 		height--;
 	osi_list_add(&metabh->b_altlist, &metalist[0]);
         for (i = 1; i <= height; i++){
diff --git a/gfs2/fsck/fsck.h b/gfs2/fsck/fsck.h
index b0e1efc..bc14b88 100644
--- a/gfs2/fsck/fsck.h
+++ b/gfs2/fsck/fsck.h
@@ -84,9 +84,12 @@ enum rgindex_trust_level { /* how far can we trust our RG index? */
 	open_minded = 2, /* At least 1 RG is corrupt. Try to calculate what it
 			    should be, in a perfect world where our RGs are all
 			    on even boundaries. Blue sky. Chirping birds. */
-	distrust = 3   /* The world isn't perfect, our RGs are not on nice neat
+	distrust = 3,  /* The world isn't perfect, our RGs are not on nice neat
 			  boundaries.  The fs must have been messed with by
 			  gfs2_grow or something.  Count the RGs by hand. */
+	indignation = 4 /* Not only do we have corruption, but the rgrps
+			   aren't on even boundaries, so this file system
+			   must have been converted from gfs2_convert. */
 };
 
 extern struct gfs2_inode *fsck_load_inode(struct gfs2_sbd *sbp, uint64_t block);
diff --git a/gfs2/fsck/initialize.c b/gfs2/fsck/initialize.c
index 14cc9f6..469f022 100644
--- a/gfs2/fsck/initialize.c
+++ b/gfs2/fsck/initialize.c
@@ -143,7 +143,8 @@ static int set_block_ranges(struct gfs2_sbd *sdp)
 	{
 		rgd = osi_list_entry(tmp, struct rgrp_list, list);
 		ri = &rgd->ri;
-		if (ri->ri_data0 + ri->ri_data - 1 > rmax)
+		if (ri->ri_data0 + ri->ri_data &&
+		    ri->ri_data0 + ri->ri_data - 1 > rmax)
 			rmax = ri->ri_data0 + ri->ri_data - 1;
 		if (!rmin || ri->ri_data0 < rmin)
 			rmin = ri->ri_data0;
@@ -402,6 +403,20 @@ static int init_system_inodes(struct gfs2_sbd *sdp)
 	int rgcount, sane = 1;
 	enum rgindex_trust_level trust_lvl;
 	uint64_t addl_mem_needed;
+	const char *level_desc[] = {
+		_("Checking if all rgrp and rindex values are good"),
+		_("Checking if rindex values are ascending and evenly spaced"),
+		_("Calculating where the rgrps should be if evenly spaced"),
+		_("Trying to rebuild rindex assuming evenly spaced rgrps"),
+		_("Trying to rebuild rindex assuming unevenly spaced rgrps"),
+	};
+	const char *fail_desc[] = {
+		_("Some damage was found; we need to take remedial measures"),
+		_("rindex is unevenly spaced: converted from gfs1 or corrupt"),
+		_("rindex calculations don't match: uneven rgrp boundaries"),
+		_("Too many rgrp misses: rgrps must be unevenly spaced"),
+		_("Too much damage found: we cannot rebuild this rindex"),
+	};
 
 	/*******************************************************************
 	 ******************  Initialize important inodes  ******************
@@ -442,18 +457,23 @@ static int init_system_inodes(struct gfs2_sbd *sdp)
 	 ********  Validate and read in resource group information  ********
 	 *******************************************************************/
 	log_warn( _("Validating Resource Group index.\n"));
-	for (trust_lvl = blind_faith; trust_lvl <= distrust; trust_lvl++) {
-		log_warn( _("Level %d RG check.\n"), trust_lvl + 1);
+	for (trust_lvl = blind_faith; trust_lvl <= indignation; trust_lvl++) {
+		log_warn( _("Level %d rgrp check: %s.\n"), trust_lvl + 1,
+			  level_desc[trust_lvl]);
 		if ((rg_repair(sdp, trust_lvl, &rgcount, &sane) == 0) &&
 		    (ri_update(sdp, 0, &rgcount, &sane) == 0)) {
 			log_warn( _("(level %d passed)\n"), trust_lvl + 1);
 			break;
 		}
 		else
-			log_err( _("(level %d failed)\n"), trust_lvl + 1);
+			log_err( _("(level %d failed: %s)\n"), trust_lvl + 1,
+				 fail_desc[trust_lvl]);
+		if (fsck_abort)
+			break;
 	}
-	if (trust_lvl > distrust) {
-		log_err( _("RG recovery impossible; I can't fix this file system.\n"));
+	if (trust_lvl > indignation) {
+		log_err( _("Resource Group recovery impossible; I can't fix "
+			   "this file system.\n"));
 		return -1;
 	}
 	log_info( _("%u resource groups found.\n"), rgcount);
diff --git a/gfs2/fsck/main.c b/gfs2/fsck/main.c
index 222219f..c68e3da 100644
--- a/gfs2/fsck/main.c
+++ b/gfs2/fsck/main.c
@@ -334,7 +334,8 @@ int main(int argc, char **argv)
 		error = FSCK_CANCELED;
 	}
 
-	check_statfs(sbp);
+	if (!fsck_abort)
+		check_statfs(sbp);
 
 	/* Free up our system inodes */
 	inode_put(&sbp->md.inum);
diff --git a/gfs2/fsck/metawalk.c b/gfs2/fsck/metawalk.c
index ff348e4..cffff8a 100644
--- a/gfs2/fsck/metawalk.c
+++ b/gfs2/fsck/metawalk.c
@@ -35,7 +35,7 @@ int check_n_fix_bitmap(struct gfs2_sbd *sdp, uint64_t blk,
 
 	old_bitmap_state = gfs2_get_bitmap(sdp, blk, rgd);
 	if (old_bitmap_state < 0) {
-		log_err( _("Block %lld (0x%llx) is not represented in the"
+		log_err( _("Block %lld (0x%llx) is not represented in the "
 			   "system bitmap; part of an rgrp or superblock.\n"),
 			 (unsigned long long)blk, (unsigned long long)blk);
 		return -1;
@@ -726,6 +726,10 @@ static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass)
 			if (pass->check_dentry && S_ISDIR(ip->i_di.di_mode)) {
 				error = check_entries(ip, lbh, DIR_EXHASH,
 						      &count, pass);
+
+				if (skip_this_pass || fsck_abort)
+					return 0;
+
 				if(error < 0) {
 					stack;
 					brelse(lbh);
diff --git a/gfs2/fsck/pass2.c b/gfs2/fsck/pass2.c
index 35acba4..90c6940 100644
--- a/gfs2/fsck/pass2.c
+++ b/gfs2/fsck/pass2.c
@@ -544,6 +544,8 @@ static int check_system_dir(struct gfs2_inode *sysinode, const char *dirname,
 		}
 	}
 	error = check_dir(sysinode->i_sbd, iblock, &pass2_fxns);
+	if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+		return FSCK_OK;
 	if(error < 0) {
 		stack;
 		return -1;
@@ -654,18 +656,26 @@ int pass2(struct gfs2_sbd *sbp)
 		stack;
 		return FSCK_ERROR;
 	}
+	if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+		return FSCK_OK;
 	if (check_system_dir(sbp->md.pinode, "per_node", build_per_node)) {
 		stack;
 		return FSCK_ERROR;
 	}
+	if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+		return FSCK_OK;
 	if (check_system_dir(sbp->master_dir, "master", build_master)) {
 		stack;
 		return FSCK_ERROR;
 	}
+	if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+		return FSCK_OK;
 	if (check_system_dir(sbp->md.rooti, "root", build_root)) {
 		stack;
 		return FSCK_ERROR;
 	}
+	if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+		return FSCK_OK;
 	log_info( _("Checking directory inodes.\n"));
 	/* Grab each directory inode, and run checks on it */
 	for(dirblk = 0; dirblk < last_fs_block; dirblk++) {
@@ -699,6 +709,8 @@ int pass2(struct gfs2_sbd *sbp)
 			}
 		}
 		error = check_dir(sbp, dirblk, &pass2_fxns);
+		if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+			return FSCK_OK;
 		if(error < 0) {
 			stack;
 			return FSCK_ERROR;
diff --git a/gfs2/fsck/rgrepair.c b/gfs2/fsck/rgrepair.c
index 705ba86..af5a02d 100644
--- a/gfs2/fsck/rgrepair.c
+++ b/gfs2/fsck/rgrepair.c
@@ -15,6 +15,9 @@
 int rindex_modified = FALSE;
 struct special_blocks false_rgrps;
 
+#define BAD_RG_PERCENT_TOLERANCE 11
+#define AWAY_FROM_BITMAPS 0x1000
+
 #define ri_equal(ondisk, expected, field) (ondisk.field == expected.field)
 
 #define ri_compare(rg, ondisk, expected, field, fmt)	\
@@ -47,7 +50,7 @@ static void find_journaled_rgs(struct gfs2_sbd *sdp)
 
 	osi_list_init(&false_rgrps.list);
 	for (j = 0; j < sdp->md.journals; j++) {
-		log_debug( _("Checking for RGs in journal%d.\n"), j);
+		log_debug( _("Checking for rgrps in journal%d.\n"), j);
 		ip = sdp->md.journal[j];
 		jblocks = ip->i_di.di_size / sdp->sd_sb.sb_bsize;
 		for (b = 0; b < jblocks; b++) {
@@ -56,7 +59,7 @@ static void find_journaled_rgs(struct gfs2_sbd *sdp)
 				break;
 			bh = bread(sdp, dblock);
 			if (!gfs2_check_meta(bh, GFS2_METATYPE_RG)) {
-				log_debug( _("False RG found at block "
+				log_debug( _("False rgrp found at block "
 					  "0x%" PRIx64 "\n"), dblock);
 				gfs2_special_set(&false_rgrps, dblock);
 			}
@@ -73,66 +76,37 @@ static int is_false_rg(uint64_t block)
 }
 
 /*
- * gfs2_rindex_rebuild - rebuild a corrupt Resource Group (RG) index manually
- *                        where trust_lvl == distrust
- *
- * If this routine is called, it means we have RGs in odd/unexpected places,
- * and there is a corrupt RG or RG index entry.  It also means we can't trust
- * the RG index to be sane, and the RGs don't agree with how mkfs would have
- * built them by default.  So we have no choice but to go through and count 
- * them by hand.  We've tried twice to recover the RGs and RG index, and
- * failed, so this is our last chance to remedy the situation.
- *
- * This routine tries to minimize performance impact by:
- * 1. Skipping through the filesystem at known increments when possible.
- * 2. Shuffle through every block when RGs are not found at the predicted
- *    locations.
- *
- * Note: A GFS2 filesystem differs from a GFS1 file system in that there will
- * only be ONE chunk (i.e. no artificial subdevices on either size of the
- * journals).  The journals and even the rindex are kept as part of the file
- * system, so we need to rebuild that information by hand.  Also, with GFS1,
- * the different chunks ("subdevices") could have different RG sizes, which
- * made for quite a mess when trying to recover RGs.  GFS2 always uses the 
- * same RG size determined by the original mkfs, so recovery is easier.
+ * find_shortest_rgdist - hunt and peck for the shortest distance between RGs.
  *
+ * Sample several of them because an RG that's been blasted may
+ * look like twice the distance.  If we can find 6 of them, that
+ * should be enough to figure out the correct layout.
+ * This also figures out first_rg_dist since that's always different.
  */
-static int gfs2_rindex_rebuild(struct gfs2_sbd *sdp, osi_list_t *ret_list,
-			 int *num_rgs)
+static uint64_t find_shortest_rgdist(struct gfs2_sbd *sdp,
+				     uint64_t *initial_first_rg_dist,
+				     uint64_t *first_rg_dist)
 {
+	uint64_t blk, block_of_last_rg, shortest_dist_btwn_rgs;
 	struct gfs2_buffer_head *bh;
-	uint64_t shortest_dist_btwn_rgs;
-	uint64_t blk, block_of_last_rg;
-	uint64_t fwd_block, block_bump;
-	uint64_t first_rg_dist, initial_first_rg_dist;
-	struct rgrp_list *calc_rgd, *prev_rgd;
-	int number_of_rgs, rgi;
+	int number_of_rgs = 0;
 	struct gfs2_rindex buf, tmpndx;
-	int rg_was_fnd = FALSE, corrupt_rgs = 0, bitmap_was_fnd;
-	osi_list_t *tmp;
 
 	/* Figure out if there are any RG-looking blocks in the journal we
 	   need to ignore. */
 	find_journaled_rgs(sdp);
-	osi_list_init(ret_list);
-	number_of_rgs = 0;
-	initial_first_rg_dist = first_rg_dist = sdp->sb_addr + 1;
+
+	*initial_first_rg_dist = *first_rg_dist = sdp->sb_addr + 1;
 	block_of_last_rg = sdp->sb_addr + 1;
-	/* ------------------------------------------------------------- */
-	/* First, hunt and peck for the shortest distance between RGs.   */
-	/* Sample several of them because an RG that's been blasted may  */
-	/* look like twice the distance.  If we can find 6 of them, that */
-	/* should be enough to figure out the correct layout.            */
-	/* ------------------------------------------------------------- */
 	shortest_dist_btwn_rgs = sdp->device.length;
+
 	for (blk = sdp->sb_addr + 1;
-	     blk < sdp->device.length && number_of_rgs < 6;
-	     blk++) {
+	     blk < sdp->device.length && number_of_rgs < 6; blk++) {
 		bh = bread(sdp, blk);
 		if (((blk == sdp->sb_addr + 1) ||
 		    (!gfs2_check_meta(bh, GFS2_METATYPE_RG))) &&
 		    !is_false_rg(blk)) {
-			log_debug( _("RG found at block 0x%" PRIx64 "\n"), blk);
+			log_debug( _("rgrp found at block 0x%" PRIx64 "\n"), blk);
 			if (blk > sdp->sb_addr + 1) {
 				uint64_t rgdist;
 				
@@ -149,8 +123,8 @@ static int gfs2_rindex_rebuild(struct gfs2_sbd *sdp, osi_list_t *ret_list,
 				/* from the rest because of the superblock   */
 				/* and 64K dead space.                       */
 				/* ----------------------------------------- */
-				if (first_rg_dist == initial_first_rg_dist)
-					first_rg_dist = rgdist;
+				if (*first_rg_dist == *initial_first_rg_dist)
+					*first_rg_dist = rgdist;
 				if (rgdist < shortest_dist_btwn_rgs) {
 					shortest_dist_btwn_rgs = rgdist;
 					log_debug( _("(shortest so far)\n"));
@@ -164,18 +138,15 @@ static int gfs2_rindex_rebuild(struct gfs2_sbd *sdp, osi_list_t *ret_list,
 		}
 		brelse(bh);
 	}
-	number_of_rgs = 0;
-	gfs2_special_free(&false_rgrps);
-
 	/* -------------------------------------------------------------- */
 	/* Sanity-check our first_rg_dist. If RG #2 got nuked, the        */
 	/* first_rg_dist would measure from #1 to #3, which would be bad. */
 	/* We need to take remedial measures to fix it (from the index).  */
 	/* -------------------------------------------------------------- */
-	log_debug( _("First RG distance: 0x%" PRIx64 "\n"), first_rg_dist);
-	log_debug( _("Distance between RGs: 0x%" PRIx64 "\n"),
+	log_debug( _("First rgrp distance: 0x%" PRIx64 "\n"), *first_rg_dist);
+	log_debug( _("Distance between rgrps: 0x%" PRIx64 "\n"),
 		  shortest_dist_btwn_rgs);
-	if (first_rg_dist >= shortest_dist_btwn_rgs +
+	if (*first_rg_dist >= shortest_dist_btwn_rgs +
 	    (shortest_dist_btwn_rgs / 4)) {
 		/* read in the second RG index entry for this subd. */
 		gfs2_readi(sdp->md.riinode, (char *)&buf,
@@ -183,28 +154,302 @@ static int gfs2_rindex_rebuild(struct gfs2_sbd *sdp, osi_list_t *ret_list,
 			   sizeof(struct gfs2_rindex));
 		gfs2_rindex_in(&tmpndx, (char *)&buf);
 		if (tmpndx.ri_addr > sdp->sb_addr + 1) { /* sanity check */
-			log_warn( _("RG 2 is damaged: getting dist from index: "));
-			first_rg_dist = tmpndx.ri_addr - (sdp->sb_addr + 1);
-			log_warn("0x%" PRIx64 "\n", first_rg_dist);
+			log_warn( _("rgrp 2 is damaged: getting dist from index: "));
+			*first_rg_dist = tmpndx.ri_addr - (sdp->sb_addr + 1);
+			log_warn("0x%" PRIx64 "\n", *first_rg_dist);
 		}
 		else {
-			log_warn( _("RG index 2 is damaged: extrapolating dist: "));
-			first_rg_dist = sdp->device.length -
+			log_warn( _("rgrp index 2 is damaged: extrapolating dist: "));
+			*first_rg_dist = sdp->device.length -
 				(sdp->rgrps - 1) *
 				(sdp->device.length / sdp->rgrps);
-			log_warn("0x%" PRIx64 "\n", first_rg_dist);
+			log_warn("0x%" PRIx64 "\n", *first_rg_dist);
 		}
-		log_debug( _("Adjusted first RG distance: 0x%" PRIx64 "\n"),
-			  first_rg_dist);
+		log_debug( _("Adjusted first rgrp distance: 0x%" PRIx64 "\n"),
+			  *first_rg_dist);
 	} /* if first RG distance is within tolerance */
+
+	gfs2_special_free(&false_rgrps);
+	return shortest_dist_btwn_rgs;
+}
+
+/*
+ * count_usedspace - count the used bits in a rgrp bitmap buffer
+ */
+static uint64_t count_usedspace(struct gfs2_sbd *sdp, int first,
+				struct gfs2_buffer_head *bh)
+{
+	int off, x, y, bytes_to_check;
+	uint32_t rg_used = 0;
+	unsigned int state;
+
+	/* Count up the free blocks in the bitmap */
+	off = (first) ? sizeof(struct gfs2_rgrp) :
+		sizeof(struct gfs2_meta_header);
+	bytes_to_check = sdp->bsize - off;
+	for (x = 0; x < bytes_to_check; x++) {
+		unsigned char *byte;
+
+		byte = (unsigned char *)&bh->b_data[off + x];
+		if (*byte == 0x55) {
+			rg_used += GFS2_NBBY;
+			continue;
+		}
+		if (*byte == 0x00)
+			continue;
+		for (y = 0; y < GFS2_NBBY; y++) {
+			state = (*byte >> (GFS2_BIT_SIZE * y)) & GFS2_BIT_MASK;
+			if (state == GFS2_BLKST_FREE ||
+			    state == GFS2_BLKST_UNLINKED)
+				continue;
+			rg_used++;
+		}
+	}
+	return rg_used;
+}
+
+/*
+ * find_next_rgrp_dist - find the distance to the next rgrp
+ *
+ * This function is only called if the rgrps are determined to be on uneven
+ * boundaries.  In a normal gfs2 file system, after mkfs.gfs2, all the
+ * rgrps but the first and second one will be the same distance from the
+ * previous rgrp.  (The first rgrp will predictably be after the superblock
+ * and the second one will be adjusted based on the number 64KB skipped
+ * at the start of the file system.)  The only way we can deviate from that
+ * pattern is if the user did gfs_grow on a gfs1 file system, then converted
+ * it to gfs2 using gfs2_convert.
+ *
+ * This function finds the distance to the next rgrp for these cases.
+ */
+static uint64_t find_next_rgrp_dist(struct gfs2_sbd *sdp, uint64_t blk,
+				    struct rgrp_list *prevrgd)
+{
+	uint64_t rgrp_dist = 0, used_blocks, block, next_block, twogigs;
+	osi_list_t *tmp;
+	struct rgrp_list *rgd = NULL, *next_rgd;
+	struct gfs2_buffer_head *bh;
+	struct gfs2_meta_header mh;
+	int first, length, b, found, mega_in_blocks;
+	uint32_t free_blocks;
+
+	for (tmp = sdp->rglist.next; tmp != &sdp->rglist; tmp = tmp->next) {
+		rgd = osi_list_entry(tmp, struct rgrp_list, list);
+		if (rgd->ri.ri_addr == blk)
+			break;
+	}
+	if (rgd && tmp && tmp != &sdp->rglist && tmp->next &&
+	    rgd->ri.ri_addr == blk) {
+		tmp = tmp->next;
+		next_rgd = osi_list_entry(tmp, struct rgrp_list, list);
+		rgrp_dist = next_rgd->ri.ri_addr - rgd->ri.ri_addr;
+		return rgrp_dist;
+	}
+	mega_in_blocks = (1024 * 1024)  / sdp->bsize;
+	twogigs = 2048 * mega_in_blocks;
+	/* Unfortunately, if we fall through to here we can't trust the
+	   rindex.  So we have to analyze the current rgrp to figure out
+	   the bare minimum block number where it ends. If we don't have
+	   rindex, all we know about this rgrp is what's on disk: its
+	   rg_free.  If we analyze the rgrp's bitmap and the bitmaps that
+	   follow, we can figure out how many bits are used.  If we add
+	   rg_free, we get the total number of blocks this rgrp
+	   represents.  After that should be the next rgrp, but it may
+	   skip a few blocks (hopefully no more than 4).  */
+	used_blocks = 0;
+	length = 0;
+	block = prevrgd->ri.ri_addr;
+	first = 1;
+	found = 0;
+	while (1) {
+		if (block >= sdp->device.length)
+			break;
+		if (block >= prevrgd->ri.ri_addr + twogigs)
+			break;
+		bh = bread(sdp, block);
+		gfs2_meta_header_in(&mh, bh);
+		if ((mh.mh_magic != GFS2_MAGIC) ||
+		    (first && mh.mh_type != GFS2_METATYPE_RG) ||
+		    (!first && mh.mh_type != GFS2_METATYPE_RB)) {
+			brelse(bh);
+			break;
+		}
+		if (first) {
+			struct gfs2_rgrp *rg;
+
+			rg = (struct gfs2_rgrp *)bh->b_data;
+			free_blocks = be32_to_cpu(rg->rg_free);
+		}
+		used_blocks += count_usedspace(sdp, first, bh);
+		first = 0;
+		block++;
+		length++;
+		brelse(bh);
+		/* Check if this distance points to an rgrp:
+		   We have to look for blocks that resemble rgrps and bitmaps.
+		   If they do, we need to count blocks used and free and see
+		   if adding that number of free blocks accounts for the
+		   next rgrp we find. Otherwise, you could have a length of
+		   6 with additional user blocks that just happen to look like
+		   bitmap blocks.  Count them all as bitmaps and you'll be
+		   hopelessly lost. */
+		rgrp_dist = used_blocks + free_blocks + length;
+		next_block = prevrgd->ri.ri_addr + rgrp_dist;
+		/* Now we account for block rounding done by mkfs.gfs2 */
+		for (b = 0; b <= length + GFS2_NBBY; b++) {
+			if (next_block >= sdp->device.length)
+				break;
+			bh = bread(sdp, next_block + b);
+			gfs2_meta_header_in(&mh, bh);
+			brelse(bh);
+			if (mh.mh_magic == GFS2_MAGIC) {
+				if (mh.mh_type == GFS2_METATYPE_RG) {
+					found = 1;
+					break;
+				}
+				/* if the first thing we find is a bitmap,
+				   there must be a damaged rgrp on the
+				   previous block. */
+				if (mh.mh_type == GFS2_METATYPE_RB) {
+					found = 1;
+					rgrp_dist--;
+					break;
+				}
+			}
+			rgrp_dist++;
+		}
+		if (found) {
+			block = next_block;
+			log_info( _("rgrp found at 0x%llx, length=%d, "
+				    "used=%llu, free=%d\n"),
+				  prevrgd->ri.ri_addr, length,
+				  (unsigned long long)used_blocks,
+				  free_blocks);
+			break;
+		}
+	}
+	return rgrp_dist;
+}
+
+/*
+ * hunt_and_peck - find the distance to the next rgrp
+ *
+ * This function is only called if the rgrps are determined to be on uneven
+ * boundaries, and also corrupt.  So we have to go out searching for one.
+ */
+static uint64_t hunt_and_peck(struct gfs2_sbd *sdp, uint64_t blk,
+			      struct rgrp_list *prevrgd, uint64_t last_bump)
+{
+	uint64_t rgrp_dist = 0, block, twogigs, last_block, last_meg;
+	struct gfs2_buffer_head *bh;
+	struct gfs2_meta_header mh;
+	int b, mega_in_blocks;
+
+	/* Skip ahead the previous amount: we might get lucky.
+	   If we're close to the end of the device, take the rest. */
+	if (gfs2_check_range(sdp, blk + last_bump))
+		return sdp->fssize - blk;
+
+	bh = bread(sdp, blk + last_bump);
+	gfs2_meta_header_in(&mh, bh);
+	brelse(bh);
+	if (mh.mh_magic == GFS2_MAGIC && mh.mh_type == GFS2_METATYPE_RG) {
+		log_info( _("rgrp found at 0x%llx, length=%lld\n"),
+			  (unsigned long long)blk + last_bump,
+			  (unsigned long long)last_bump);
+		return last_bump;
+	}
+
+	rgrp_dist = AWAY_FROM_BITMAPS; /* Get away from any bitmaps
+					  associated with the previous rgrp */
+	block = prevrgd->ri.ri_addr + rgrp_dist;
+	/* Now we account for block rounding done by mkfs.gfs2.  A rgrp can
+	   be at most 2GB in size, so that's where we call it. We do somewhat
+	   obscure math here to avoid integer overflows. */
+	mega_in_blocks = (1024 * 1024)  / sdp->bsize;
+	twogigs = 2048 * mega_in_blocks;
+	if (block + twogigs <= sdp->fssize) {
+		last_block = twogigs;
+		last_meg = 0;
+	} else {
+		/* There won't be a rgrp in the last megabyte. */
+		last_block = sdp->fssize - block - mega_in_blocks;
+		last_meg = mega_in_blocks;
+	}
+	for (b = AWAY_FROM_BITMAPS; b < last_block; b++) {
+		bh = bread(sdp, block + b);
+		gfs2_meta_header_in(&mh, bh);
+		brelse(bh);
+		if (mh.mh_magic == GFS2_MAGIC) {
+			if (mh.mh_type == GFS2_METATYPE_RG)
+				break;
+			/* if the first thing we find is a bitmap, there must
+			   be a damaged rgrp on the previous block. */
+			if (mh.mh_type == GFS2_METATYPE_RB) {
+				rgrp_dist--;
+				break;
+			}
+		}
+		rgrp_dist++;
+	}
+	return rgrp_dist + last_meg;
+}
+
+/*
+ * gfs2_rindex_rebuild - rebuild a corrupt Resource Group (RG) index manually
+ *                        where trust_lvl == distrust
+ *
+ * If this routine is called, it means we have RGs in odd/unexpected places,
+ * and there is a corrupt RG or RG index entry.  It also means we can't trust
+ * the RG index to be sane, and the RGs don't agree with how mkfs would have
+ * built them by default.  So we have no choice but to go through and count
+ * them by hand.  We've tried twice to recover the RGs and RG index, and
+ * failed, so this is our last chance to remedy the situation.
+ *
+ * This routine tries to minimize performance impact by:
+ * 1. Skipping through the filesystem at known increments when possible.
+ * 2. Shuffle through every block when RGs are not found at the predicted
+ *    locations.
+ *
+ * Note: A GFS2 filesystem differs from a GFS1 file system in that there will
+ * only be ONE chunk (i.e. no artificial subdevices on either size of the
+ * journals).  The journals and even the rindex are kept as part of the file
+ * system, so we need to rebuild that information by hand.  Also, with GFS1,
+ * the different chunks ("subdevices") could have different RG sizes, which
+ * made for quite a mess when trying to recover RGs.  GFS2 always uses the
+ * same RG size determined by the original mkfs, so recovery is easier.
+ *
+ * If "gfs_grow" is specified the file system was most likely converted
+ * from gfs1 to gfs2 after a gfs_grow operation.  In that case, the rgrps
+ * will not be on predictable boundaries.
+ */
+static int gfs2_rindex_rebuild(struct gfs2_sbd *sdp, osi_list_t *ret_list,
+			       int *num_rgs, int gfs_grow)
+{
+	struct gfs2_buffer_head *bh;
+	uint64_t shortest_dist_btwn_rgs;
+	uint64_t blk;
+	uint64_t fwd_block, block_bump;
+	uint64_t first_rg_dist, initial_first_rg_dist;
+	struct rgrp_list *calc_rgd, *prev_rgd;
+	int number_of_rgs, rgi;
+	int rg_was_fnd = FALSE, corrupt_rgs = 0, bitmap_was_fnd;
+	osi_list_t *tmp;
+
+	osi_list_init(ret_list);
+	initial_first_rg_dist = first_rg_dist = sdp->sb_addr + 1;
+	shortest_dist_btwn_rgs = find_shortest_rgdist(sdp,
+						      &initial_first_rg_dist,
+						      &first_rg_dist);
+	number_of_rgs = 0;
 	/* -------------------------------------------------------------- */
 	/* Now go through the RGs and verify their integrity, fixing as   */
 	/* needed when corruption is encountered.                         */
 	/* -------------------------------------------------------------- */
 	prev_rgd = NULL;
 	block_bump = first_rg_dist;
-	for (blk = sdp->sb_addr + 1; blk <= sdp->device.length;
-	     blk += block_bump) {
+	blk = sdp->sb_addr + 1;
+	while (blk <= sdp->device.length) {
 		log_debug( _("Block 0x%" PRIx64 "\n"), blk);
 		bh = bread(sdp, blk);
 		rg_was_fnd = (!gfs2_check_meta(bh, GFS2_METATYPE_RG));
@@ -212,7 +457,7 @@ static int gfs2_rindex_rebuild(struct gfs2_sbd *sdp, osi_list_t *ret_list,
 		/* Allocate a new RG and index. */
 		calc_rgd = malloc(sizeof(struct rgrp_list));
 		if (!calc_rgd) {
-			log_crit( _("Can't allocate memory for rg repair.\n"));
+			log_crit( _("Can't allocate memory for rgrp repair.\n"));
 			return -1;
 		}
 		memset(calc_rgd, 0, sizeof(struct rgrp_list));
@@ -225,11 +470,13 @@ static int gfs2_rindex_rebuild(struct gfs2_sbd *sdp, osi_list_t *ret_list,
 			/* ------------------------------------------------- */
 			corrupt_rgs++;
 			if (corrupt_rgs < 5)
-				log_debug( _("Missing or damaged RG at block %" 
+				log_debug( _("Missing or damaged rgrp at block %"
 					  PRIu64 " (0x%" PRIx64 ")\n"),
 					  blk, blk);
 			else {
-				log_crit( _("Error: too many bad RGs.\n"));
+				log_crit( _("Error: too many missing or "
+					    "damaged rgrps using this method. "
+					    "Time to try another method.\n"));
 				return -1;
 			}
 		}
@@ -261,6 +508,8 @@ static int gfs2_rindex_rebuild(struct gfs2_sbd *sdp, osi_list_t *ret_list,
 
 			prev_rgd->ri.ri_length = bitblocks;
 			prev_rgd->ri.ri_data = rgblocks;
+			prev_rgd->ri.ri_data0 = prev_rgd->ri.ri_addr +
+				prev_rgd->ri.ri_length;
 			prev_rgd->ri.ri_data -= prev_rgd->ri.ri_data %
 				GFS2_NBBY;
 			prev_rgd->ri.ri_bitbytes = prev_rgd->ri.ri_data /
@@ -269,18 +518,42 @@ static int gfs2_rindex_rebuild(struct gfs2_sbd *sdp, osi_list_t *ret_list,
 				  prev_rgd->ri.ri_data);
 		}
 		number_of_rgs++;
-		log_warn( _("%c RG %d at block 0x%" PRIX64 " %s"),
-			 (rg_was_fnd ? ' ' : '*'), number_of_rgs, blk,
-			 (rg_was_fnd ? "intact" : "*** DAMAGED ***"));
+		if (rg_was_fnd)
+			log_info( _("  rgrp %d at block 0x%llx intact"),
+				  number_of_rgs, (unsigned long long)blk);
+		else
+			log_warn( _("* rgrp %d at block 0x%llx *** DAMAGED ***"),
+				  number_of_rgs, (unsigned long long)blk);
 		prev_rgd = calc_rgd;
-		block_of_last_rg = blk;
-
+		/*
+		 * Figure out where our next rgrp should be.
+		 */
 		if (blk == sdp->sb_addr + 1)
 			block_bump = first_rg_dist;
-		else
+		else if (!gfs_grow) {
 			block_bump = shortest_dist_btwn_rgs;
-		if (block_bump != 1)
-			log_warn( _(" [length 0x%" PRIx64 "]\n"), block_bump);
+			/* if we have uniformly-spaced rgrps, there may be
+			   some wasted space at the end of the device.
+			   Since we don't want to create a short rgrp and
+			   break our uniformity, just quit here. */
+			if (blk + (2 * block_bump) > sdp->device.length)
+				break;
+		} else if (rg_was_fnd)
+			block_bump = find_next_rgrp_dist(sdp, blk, prev_rgd);
+		else
+			block_bump = hunt_and_peck(sdp, blk, prev_rgd,
+						   block_bump);
+		if (block_bump != 1) {
+			if (rg_was_fnd)
+				log_info( _(" [length 0x%" PRIx64 "]\n"),
+					  block_bump);
+			else
+				log_warn( _(" [length 0x%" PRIx64 "]\n"),
+					  block_bump);
+		} else {
+			log_warn("\n");
+		}
+		blk += block_bump;
 	} /* for each rg block */
 	/* ----------------------------------------------------------------- */
 	/* If we got to the end of the fs, we still need to fix the          */
@@ -293,6 +566,8 @@ static int gfs2_rindex_rebuild(struct gfs2_sbd *sdp, osi_list_t *ret_list,
 		rgblocks2bitblocks(sdp->bsize, &rgblocks, &bitblocks);
 
 		prev_rgd->ri.ri_length = bitblocks;
+		prev_rgd->ri.ri_data0 = prev_rgd->ri.ri_addr +
+			prev_rgd->ri.ri_length;
 		prev_rgd->ri.ri_data = rgblocks;
 		prev_rgd->ri.ri_data -= prev_rgd->ri.ri_data % GFS2_NBBY;
 		prev_rgd->ri.ri_bitbytes = prev_rgd->ri.ri_data / GFS2_NBBY;
@@ -303,8 +578,8 @@ static int gfs2_rindex_rebuild(struct gfs2_sbd *sdp, osi_list_t *ret_list,
         /* ---------------------------------------------- */
         /* Now dump out the information (if verbose mode) */      
         /* ---------------------------------------------- */
-        log_debug( _("RG index rebuilt as follows:\n"));
-        for (tmp = ret_list, rgi = 0; tmp != ret_list;
+        log_debug( _("rindex rebuilt as follows:\n"));
+        for (tmp = ret_list->next, rgi = 0; tmp != ret_list;
 	     tmp = tmp->next, rgi++) {
                 calc_rgd = osi_list_entry(tmp, struct rgrp_list, list);
                 log_debug("%d: 0x%llx / %x / 0x%llx"
@@ -334,8 +609,18 @@ static int gfs2_rindex_rebuild(struct gfs2_sbd *sdp, osi_list_t *ret_list,
 static int gfs2_rindex_calculate(struct gfs2_sbd *sdp, osi_list_t *ret_list,
 			   int *num_rgs)
 {
+	uint64_t num_rgrps = 0;
+
+	/* ----------------------------------------------------------------- */
+	/* Calculate how many RGs there are supposed to be based on the      */
+	/* rindex filesize.  Remember that our trust level is open-minded    */
+	/* here.  If the filesize of the rindex file is not a multiple of    */
+	/* our rindex structures, then something's wrong and we can't trust  */
+	/* the index.                                                        */
+	/* ----------------------------------------------------------------- */
+	*num_rgs = sdp->md.riinode->i_di.di_size / sizeof(struct gfs2_rindex);
+
 	osi_list_init(ret_list);
-	sdp->rgsize = GFS2_DEFAULT_RGSIZE; /* compute_rgrp_layout adjusts */
 	if (device_geometry(sdp)) {
 		fprintf(stderr, _("Geometry error\n"));
 		exit(-1);
@@ -346,21 +631,23 @@ static int gfs2_rindex_calculate(struct gfs2_sbd *sdp, osi_list_t *ret_list,
 		exit(-1);
 	}
 
+	/* Try all possible rgrp sizes: 2048, 1024, 512, 256, 128, 64, 32 */
+	for (sdp->rgsize = GFS2_DEFAULT_RGSIZE; sdp->rgsize >= 32;
+	     sdp->rgsize /= 2) {
+		num_rgrps = how_many_rgrps(sdp, &sdp->device, TRUE);
+		if (num_rgrps == *num_rgs) {
+			log_info(_("rgsize must be: %lld (0x%llx)\n"),
+				 (unsigned long long)sdp->rgsize,
+				 (unsigned long long)sdp->rgsize);
+			break;
+		}
+	}
 	/* Compute the default resource group layout as mkfs would have done */
-	compute_rgrp_layout(sdp, FALSE);
+	compute_rgrp_layout(sdp, TRUE);
 	build_rgrps(sdp, FALSE); /* FALSE = calc but don't write to disk. */
-	*num_rgs = 0;
 	log_debug( _("fs_total_size = 0x%" PRIX64 " blocks.\n"),
 		  sdp->device.length);
-	/* ----------------------------------------------------------------- */
-	/* Calculate how many RGs there are supposed to be based on the      */
-	/* rindex filesize.  Remember that our trust level is open-minded    */
-	/* here.  If the filesize of the rindex file is not a multiple of    */
-	/* our rindex structures, then something's wrong and we can't trust  */
-	/* the index.                                                        */
-	/* ----------------------------------------------------------------- */
-	*num_rgs = sdp->md.riinode->i_di.di_size / sizeof(struct gfs2_rindex);
-	log_warn( _("L2: number of rgs in the index = %d.\n"), *num_rgs);
+	log_warn( _("L3: number of rgs in the index = %d.\n"), *num_rgs);
 	/* Move the rg list to the return list */
 	ret_list->next = sdp->rglist.next;
 	ret_list->prev = sdp->rglist.prev;
@@ -377,14 +664,14 @@ static int rewrite_rg_block(struct gfs2_sbd *sdp, struct rgrp_list *rg,
 		     uint64_t errblock)
 {
 	int x = errblock - rg->ri.ri_addr;
+	const char *typedesc = x ? "GFS2_METATYPE_RB" : "GFS2_METATYPE_RG";
 
-	log_err( _("Block #%lld (0x%llx) (%d of %d) is neither"
-		" GFS2_METATYPE_RB nor GFS2_METATYPE_RG.\n"),
+	log_err( _("Block #%lld (0x%llx) (%d of %d) is not %s.\n"),
 		 (unsigned long long)rg->ri.ri_addr + x,
 		 (unsigned long long)rg->ri.ri_addr + x,
-		 (int)x+1, (int)rg->ri.ri_length);
+		 (int)x+1, (int)rg->ri.ri_length, typedesc);
 	if (query( _("Fix the Resource Group? (y/n)"))) {
-		log_err( _("Attempting to repair the RG.\n"));
+		log_err( _("Attempting to repair the rgrp.\n"));
 		rg->bh[x] = bread(sdp, rg->ri.ri_addr + x);
 		if (x) {
 			struct gfs2_meta_header mh;
@@ -434,6 +721,7 @@ static int expect_rindex_sanity(struct gfs2_sbd *sdp, osi_list_t *ret_list,
 		memcpy(&exp->ri, &rgd->ri, sizeof(exp->ri));
 		memcpy(&exp->rg, &rgd->rg, sizeof(exp->rg));
 		exp->bits = NULL;
+		exp->bh = NULL;
 		gfs2_compute_bitstructs(sdp, exp);
 		osi_list_add_prev(&exp->list, ret_list);
 	}
@@ -442,21 +730,59 @@ static int expect_rindex_sanity(struct gfs2_sbd *sdp, osi_list_t *ret_list,
 }
 
 /*
+ * sort_rgrp_list - sort the rgrp list
+ *
+ * A bit crude, perhaps, but we're talking about thousands, not millions of
+ * entries to sort, and the list will be almost sorted anyway, so there
+ * should be very few swaps.
+ */
+static void sort_rgrp_list(osi_list_t *head)
+{
+	struct rgrp_list *thisr, *nextr;
+	osi_list_t *tmp, *x, *next;
+	int swaps;
+
+	while(1) {
+		swaps = 0;
+		osi_list_foreach_safe(tmp, head, x) {
+			next = tmp->next;
+			if (next == head) /* at the end */
+				break;
+			thisr = osi_list_entry(tmp, struct rgrp_list, list);
+			nextr = osi_list_entry(next, struct rgrp_list, list);
+			if (thisr->ri.ri_addr > nextr->ri.ri_addr) {
+				osi_list_del(next);
+				osi_list_add_prev(next, tmp);
+				swaps++;
+			}
+		}
+		if (!swaps)
+			break;
+	}
+}
+
+/*
  * rg_repair - try to repair a damaged rg index (rindex)
  * trust_lvl - This is how much we trust the rindex file.
  *             blind_faith means we take the rindex at face value.
  *             open_minded means it might be okay, but we should verify it.
  *             distrust means it's not to be trusted, so we should go to
  *             greater lengths to build it from scratch.
+ *             indignation means we have corruption, but the file system
+ *             was converted from GFS via gfs2_convert, and its rgrps are
+ *             not on nice boundaries thanks to previous gfs_grow ops. Lovely.
  */
 int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count, int *sane)
 {
-	int error, discrepancies;
+	int error, discrepancies, percent;
 	osi_list_t expected_rglist;
 	int calc_rg_count = 0, rgcount_from_index, rg;
 	osi_list_t *exp, *act; /* expected, actual */
 	struct gfs2_rindex buf;
 
+	/* Free previous incarnations in memory, if any. */
+	gfs2_rgrp_free(&sdp->rglist);
+
 	if (trust_lvl == blind_faith)
 		return 0;
 	else if (trust_lvl == ye_of_little_faith) { /* if rindex seems sane */
@@ -467,8 +793,10 @@ int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count, int *sane)
 		}
 		error = expect_rindex_sanity(sdp, &expected_rglist,
 					     &calc_rg_count);
-		if (error)
+		if (error) {
+			gfs2_rgrp_free(&expected_rglist);
 			return error;
+		}
 	} else if (trust_lvl == open_minded) { /* If we can't trust RG index */
 		/* Calculate our own RG index for comparison */
 		error = gfs2_rindex_calculate(sdp, &expected_rglist,
@@ -480,9 +808,19 @@ int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count, int *sane)
 	}
 	else if (trust_lvl == distrust) { /* If we can't trust RG index */
 		error = gfs2_rindex_rebuild(sdp, &expected_rglist,
-					     &calc_rg_count);
+					    &calc_rg_count, 0);
 		if (error) {
-			log_crit( _("Error rebuilding rg list.\n"));
+			log_crit( _("Error rebuilding rgrp list.\n"));
+			gfs2_rgrp_free(&expected_rglist);
+			return -1;
+		}
+		sdp->rgrps = calc_rg_count;
+	}
+	else if (trust_lvl == indignation) { /* If we can't trust anything */
+		error = gfs2_rindex_rebuild(sdp, &expected_rglist,
+					    &calc_rg_count, 1);
+		if (error) {
+			log_crit( _("Error rebuilding rgrp list.\n"));
 			gfs2_rgrp_free(&expected_rglist);
 			return -1;
 		}
@@ -500,13 +838,24 @@ int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count, int *sane)
 	log_warn( _("L%d: number of rgs expected     = %lld.\n"), trust_lvl + 1,
 		 (unsigned long long)sdp->rgrps);
 	if (calc_rg_count != sdp->rgrps) {
-		log_warn( _("L%d: They don't match; either (1) the fs was extended, (2) an odd\n"), trust_lvl + 1);
-		log_warn( _("L%d: rg size was used, or (3) we have a corrupt rg index.\n"), trust_lvl + 1);
+		log_warn( _("L%d: They don't match; either (1) the fs was "
+			    "extended, (2) an odd\n"), trust_lvl + 1);
+		log_warn( _("L%d: rgrp size was used, or (3) we have a corrupt "
+			    "rg index.\n"), trust_lvl + 1);
 		gfs2_rgrp_free(&expected_rglist);
 		gfs2_rgrp_free(&sdp->rglist);
 		return -1;
 	}
 	/* ------------------------------------------------------------- */
+	/* Sort the rindex list.  Older versions of gfs_grow got the     */
+	/* rindex out of sorted order.  But rebuilding the rindex from   */
+	/* scratch will rebuild it in sorted order.                      */
+	/* The gfs2_grow program should, in theory, drop new rgrps into  */
+	/* the rindex in sorted order, so this should only matter for    */
+	/* gfs1 converted file systems.                                  */
+	/* ------------------------------------------------------------- */
+	sort_rgrp_list(&sdp->rglist);
+	/* ------------------------------------------------------------- */
 	/* Now compare the rindex to what we think it should be.         */
 	/* See how far off our expected values are.  If too much, abort. */
 	/* The theory is: if we calculated the index to have 32 RGs and  */
@@ -515,28 +864,52 @@ int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count, int *sane)
 	/* ------------------------------------------------------------- */
 	discrepancies = 0;
 	for (rg = 0, act = sdp->rglist.next, exp = expected_rglist.next;
-	     act != &sdp->rglist && exp != &expected_rglist;
-	     act = act->next, exp = exp->next, rg++) {
+	     act != &sdp->rglist && exp != &expected_rglist && !fsck_abort;
+	     rg++) {
 		struct rgrp_list *expected, *actual;
 
 		expected = osi_list_entry(exp, struct rgrp_list, list);
 		actual = osi_list_entry(act, struct rgrp_list, list);
-		if (!ri_equal(actual->ri, expected->ri, ri_addr) ||
-		    !ri_equal(actual->ri, expected->ri, ri_length) ||
+		if (actual->ri.ri_addr < expected->ri.ri_addr) {
+			act = act->next;
+			discrepancies++;
+			log_info(_("%d addr: 0x%llx < 0x%llx * mismatch\n"),
+				 rg + 1, actual->ri.ri_addr,
+				 expected->ri.ri_addr);
+			continue;
+		} else if (expected->ri.ri_addr < actual->ri.ri_addr) {
+			exp = exp->next;
+			discrepancies++;
+			log_info(_("%d addr: 0x%llx > 0x%llx * mismatch\n"),
+				 rg + 1, actual->ri.ri_addr,
+				 expected->ri.ri_addr);
+			continue;
+		}
+		if (!ri_equal(actual->ri, expected->ri, ri_length) ||
 		    !ri_equal(actual->ri, expected->ri, ri_data0) ||
 		    !ri_equal(actual->ri, expected->ri, ri_data) ||
 		    !ri_equal(actual->ri, expected->ri, ri_bitbytes)) {
 			discrepancies++;
+			log_info(_("%d addr: 0x%llx 0x%llx * has mismatch\n"),
+				 rg + 1, actual->ri.ri_addr,
+				 expected->ri.ri_addr);
 		}
+		act = act->next;
+		exp = exp->next;
 	}
-	if (trust_lvl < distrust && discrepancies > (trust_lvl * 8)) {
-		log_warn( _("Level %d didn't work.  Too many descepencies.\n"),
-			 trust_lvl + 1);
-		log_warn( _("%d out of %d RGs did not match what was expected.\n"),
-			 discrepancies, rg);
-		gfs2_rgrp_free(&expected_rglist);
-		gfs2_rgrp_free(&sdp->rglist);
-		return -1;
+	if (rg) {
+		/* Check to see if more than 2% of the rgrps are wrong.  */
+		percent = (discrepancies * 100) / rg;
+		if (percent > BAD_RG_PERCENT_TOLERANCE) {
+			log_warn( _("Level %d didn't work.  Too many "
+				    "discrepancies.\n"), trust_lvl + 1);
+			log_warn( _("%d out of %d rgrps (%d percent) did not "
+				    "match what was expected.\n"),
+				  discrepancies, rg, percent);
+			gfs2_rgrp_free(&expected_rglist);
+			gfs2_rgrp_free(&sdp->rglist);
+			return -1;
+		}
 	}
 	/* ------------------------------------------------------------- */
 	/* Now compare the rindex to what we think it should be.         */
@@ -544,17 +917,38 @@ int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count, int *sane)
 	/* so look for index problems first before looking at the rgs.   */
 	/* ------------------------------------------------------------- */
 	for (rg = 0, act = sdp->rglist.next, exp = expected_rglist.next;
-	     act != &sdp->rglist && exp != &expected_rglist;
-	     act = act->next, exp = exp->next, rg++) {
+	     exp != &expected_rglist && !fsck_abort; rg++) {
 		struct rgrp_list *expected, *actual;
 
 		expected = osi_list_entry(exp, struct rgrp_list, list);
-		actual = osi_list_entry(act, struct rgrp_list, list);
-		ri_compare(rg, actual->ri, expected->ri, ri_addr, "llx");
-		ri_compare(rg, actual->ri, expected->ri, ri_length, PRIx32);
-		ri_compare(rg, actual->ri, expected->ri, ri_data0, "llx");
-		ri_compare(rg, actual->ri, expected->ri, ri_data, PRIx32);
-		ri_compare(rg, actual->ri, expected->ri, ri_bitbytes, PRIx32);
+
+		/* If we ran out of actual rindex entries due to rindex
+		   damage, fill in a new one with the expected values. */
+		if (act == &sdp->rglist) { /* end of actual rindex */
+			log_err( _("Entry missing from rindex: 0x%llx\n"),
+				 (unsigned long long)expected->ri.ri_addr);
+			actual = (struct rgrp_list *)
+				malloc(sizeof(struct rgrp_list));
+			if (!actual) {
+				log_err(_("Out of memory!\n"));
+				break;
+			}
+			memset(actual, 0, sizeof(struct rgrp_list));
+			osi_list_add_prev(&actual->list, &sdp->rglist);
+			rindex_modified = 1;
+		} else {
+			actual = osi_list_entry(act, struct rgrp_list, list);
+			ri_compare(rg, actual->ri, expected->ri, ri_addr,
+				   "llx");
+			ri_compare(rg, actual->ri, expected->ri, ri_length,
+				   PRIx32);
+			ri_compare(rg, actual->ri, expected->ri, ri_data0,
+				   "llx");
+			ri_compare(rg, actual->ri, expected->ri, ri_data,
+				   PRIx32);
+			ri_compare(rg, actual->ri, expected->ri, ri_bitbytes,
+				   PRIx32);
+		}
 		/* If we modified the index, write it back to disk. */
 		if (rindex_modified) {
 			if (query( _("Fix the index? (y/n)"))) {
@@ -572,22 +966,27 @@ int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count, int *sane)
 				/* Therefore, gfs2_compute_bitstructs might  */
 				/* have malloced the wrong length for bitmap */
 				/* buffers.  So we have to redo it.          */
-				if (actual->bits)
+				if (actual->bits) {
 					free(actual->bits);
+					actual->bits = NULL;
+				}
 			}
 			else
-				log_err( _("RG index not fixed.\n"));
+				log_err( _("rindex not fixed.\n"));
 			gfs2_compute_bitstructs(sdp, actual);
 			rindex_modified = FALSE;
 		}
+		exp = exp->next;
+		if (act != &sdp->rglist)
+			act = act->next;
 	}
 	/* ------------------------------------------------------------- */
 	/* Read the real RGs and check their integrity.                  */
 	/* Now we can somewhat trust the rindex and the RG addresses,    */
 	/* so let's read them in, check them and optionally fix them.    */
 	/* ------------------------------------------------------------- */
-	for (rg = 0, act = sdp->rglist.next; act != &sdp->rglist;
-	     act = act->next, rg++) {
+	for (rg = 0, act = sdp->rglist.next; act != &sdp->rglist &&
+		     !fsck_abort; act = act->next, rg++) {
 		struct rgrp_list *rgd;
 		uint64_t prev_err = 0, errblock;
 		int i;
diff --git a/gfs2/libgfs2/fs_geometry.c b/gfs2/libgfs2/fs_geometry.c
index a15c31f..7c07891 100644
--- a/gfs2/libgfs2/fs_geometry.c
+++ b/gfs2/libgfs2/fs_geometry.c
@@ -22,7 +22,7 @@
  * Returns: the number of RGs
  */
 
-static uint64_t how_many_rgrps(struct gfs2_sbd *sdp, struct device *dev, int rgsize_specified)
+uint64_t how_many_rgrps(struct gfs2_sbd *sdp, struct device *dev, int rgsize_specified)
 {
 	uint64_t nrgrp;
 	uint32_t rgblocks1, rgblocksn, bitblocks1, bitblocksn;
@@ -117,7 +117,8 @@ void compute_rgrp_layout(struct gfs2_sbd *sdp, int rgsize_specified)
 		nrgrp = rgrp + sdp->new_rgrps;
 	}
 
-	log_info("\nNew resource groups:\n");
+	if (rgrp < nrgrp)
+		log_info("\nNew resource groups:\n");
 	for (; rgrp < nrgrp; rgrp++) {
 		rl = calloc(1, sizeof(struct rgrp_list));
 		if (rl == NULL) {
diff --git a/gfs2/libgfs2/libgfs2.h b/gfs2/libgfs2/libgfs2.h
index 5545b52..9e97267 100644
--- a/gfs2/libgfs2/libgfs2.h
+++ b/gfs2/libgfs2/libgfs2.h
@@ -413,6 +413,8 @@ extern int gfs2_set_bitmap(struct gfs2_sbd *sdp, uint64_t blkno, int state);
 /* fs_geometry.c */
 extern void rgblocks2bitblocks(unsigned int bsize, uint32_t *rgblocks,
 			       uint32_t *bitblocks);
+extern uint64_t how_many_rgrps(struct gfs2_sbd *sdp, struct device *dev,
+			       int rgsize_specified);
 extern void compute_rgrp_layout(struct gfs2_sbd *sdp, int rgsize_specified);
 extern void build_rgrps(struct gfs2_sbd *sdp, int write);
 
diff --git a/gfs2/libgfs2/rgrp.c b/gfs2/libgfs2/rgrp.c
index d37ed1b..b35c241 100644
--- a/gfs2/libgfs2/rgrp.c
+++ b/gfs2/libgfs2/rgrp.c
@@ -132,7 +132,15 @@ struct rgrp_list *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk)
 uint64_t gfs2_rgrp_read(struct gfs2_sbd *sdp, struct rgrp_list *rgd)
 {
 	int x, length = rgd->ri.ri_length;
+	uint64_t max_rgrp_bitbytes, max_rgrp_len;
 
+	/* Max size of an rgrp is 2GB.  Figure out how many blocks that is: */
+	max_rgrp_bitbytes = ((2147483648 / sdp->bsize) / GFS2_NBBY);
+	max_rgrp_len = max_rgrp_bitbytes / sdp->bsize;
+	if (!length && length > max_rgrp_len)
+		return -1;
+	if (gfs2_check_range(sdp, rgd->ri.ri_addr))
+		return -1;
 	for (x = 0; x < length; x++){
 		rgd->bh[x] = bread(sdp, rgd->ri.ri_addr + x);
 		if(gfs2_check_meta(rgd->bh[x],
@@ -149,7 +157,8 @@ uint64_t gfs2_rgrp_read(struct gfs2_sbd *sdp, struct rgrp_list *rgd)
 		}
 	}
 
-	gfs2_rgrp_in(&rgd->rg, rgd->bh[0]);
+	if (rgd->bh && rgd->bh[0])
+		gfs2_rgrp_in(&rgd->rg, rgd->bh[0]);
 	return 0;
 }
 
@@ -158,8 +167,10 @@ void gfs2_rgrp_relse(struct rgrp_list *rgd)
 	int x, length = rgd->ri.ri_length;
 
 	for (x = 0; x < length; x++) {
-		brelse(rgd->bh[x]);
-		rgd->bh[x] = NULL;
+		if (rgd->bh && rgd->bh[x]) {
+			brelse(rgd->bh[x]);
+			rgd->bh[x] = NULL;
+		}
 	}
 }
 
diff --git a/gfs2/libgfs2/super.c b/gfs2/libgfs2/super.c
index 5d232a6..f2dd171 100644
--- a/gfs2/libgfs2/super.c
+++ b/gfs2/libgfs2/super.c
@@ -180,10 +180,8 @@ int rindex_read(struct gfs2_sbd *sdp, int fd, int *count1, int *sane)
 			prev_rgd->length = prev_length;
 		}
 
-		if(gfs2_compute_bitstructs(sdp, rgd)) {
+		if(gfs2_compute_bitstructs(sdp, rgd))
 			*sane = 0;
-			return -1;
-		}
 
 		(*count1)++;
 		prev_rgd = rgd;
diff --git a/gfs2/mkfs/main_grow.c b/gfs2/mkfs/main_grow.c
index efe3bfb..3436019 100644
--- a/gfs2/mkfs/main_grow.c
+++ b/gfs2/mkfs/main_grow.c
@@ -180,7 +180,7 @@ static void initialize_new_portion(struct gfs2_sbd *sdp, int *old_rg_count)
 		osi_list_del(head->next);
 	}
 	/* Issue a discard ioctl for the new portion */
-	rl = osi_list_entry(&sdp->rglist.next, struct rgrp_list, list);
+	rl = osi_list_entry(sdp->rglist.next, struct rgrp_list, list);
 	discard_blocks(sdp->device_fd, rl->start * sdp->bsize,
 		       (sdp->device.length - rl->start) * sdp->bsize);
 	/* Build the remaining resource groups */