File 0166-Btrfsck-add-the-ability-to-prune-corrupt-extent-allo.patch of Package btrfsprogs.openSUSE_12.1_Update
From 6ee262863cbe668c340bc9b61ebce4ee77f06d8c Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 21 Feb 2012 14:37:21 -0500
Subject: [PATCH 15/18] Btrfsck: add the ability to prune corrupt extent
 allocation tree blocks
When we discover bad blocks in the extent allocation tree, repair can
now discard them and recreate the references from the rest of the trees.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 Makefile              |    2 +-
 btrfs-corrupt-block.c |   71 +++++++++++++-
 btrfsck.c             |  269 +++++++++++++++++++++++++++++++++----------------
 ctree.c               |  222 +++++++++++++++++-----------------------
 ctree.h               |   11 ++-
 repair.c              |   50 +++++++++
 repair.h              |   32 ++++++
 7 files changed, 439 insertions(+), 218 deletions(-)
 create mode 100644 repair.c
 create mode 100644 repair.h
Index: btrfs-progs-v0.19-118-gfdb6c04/Makefile
===================================================================
--- btrfs-progs-v0.19-118-gfdb6c04.orig/Makefile
+++ btrfs-progs-v0.19-118-gfdb6c04/Makefile
@@ -4,7 +4,7 @@ CFLAGS = -g -O0
 objects = ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
 	  root-tree.o dir-item.o file-item.o inode-item.o \
 	  inode-map.o crc32c.o rbtree.o extent-cache.o extent_io.o \
-	  volumes.o utils.o btrfs-list.o btrfslabel.o
+	  volumes.o utils.o btrfs-list.o btrfslabel.o repair.o
 
 CHECKFLAGS= -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \
 	    -Wuninitialized -Wshadow -Wundef
Index: btrfs-progs-v0.19-118-gfdb6c04/btrfs-corrupt-block.c
===================================================================
--- btrfs-progs-v0.19-118-gfdb6c04.orig/btrfs-corrupt-block.c
+++ btrfs-progs-v0.19-118-gfdb6c04/btrfs-corrupt-block.c
@@ -93,6 +93,56 @@ static void print_usage(void)
 	exit(1);
 }
 
+static void corrupt_keys(struct btrfs_trans_handle *trans,
+			 struct btrfs_root *root,
+			 struct extent_buffer *eb)
+{
+	int slot;
+	int bad_slot;
+	int nr;
+	struct btrfs_disk_key bad_key;;
+
+	nr = btrfs_header_nritems(eb);
+	if (nr == 0)
+		return;
+
+	slot = rand() % nr;
+	bad_slot = rand() % nr;
+
+	if (bad_slot == slot)
+		return;
+
+	fprintf(stderr, "corrupting keys in block %llu slot %d swapping with %d\n",
+		(unsigned long long)eb->start, slot, bad_slot);
+
+	if (btrfs_header_level(eb) == 0) {
+		btrfs_item_key(eb, &bad_key, bad_slot);
+		btrfs_set_item_key(eb, &bad_key, slot);
+	} else {
+		btrfs_node_key(eb, &bad_key, bad_slot);
+		btrfs_set_node_key(eb, &bad_key, slot);
+	}
+	btrfs_mark_buffer_dirty(eb);
+	if (!trans) {
+		csum_tree_block(root, eb, 0);
+		write_extent_to_disk(eb);
+	}
+}
+
+
+static int corrupt_keys_in_block(struct btrfs_root *root, u64 bytenr)
+{
+	struct extent_buffer *eb;
+
+	eb = read_tree_block(root, bytenr, root->leafsize, 0);
+	if (!eb)
+		return -EIO;;
+
+	corrupt_keys(NULL, root, eb);
+	free_extent_buffer(eb);
+	return 0;
+}
+
 static int corrupt_extent(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root, u64 bytenr, int copy)
 {
@@ -192,6 +242,11 @@ static void btrfs_corrupt_extent_tree(st
 	if (!eb)
 		return;
 
+	if ((rand() % 10) == 0) {
+		corrupt_keys(trans, root, eb);
+		return;
+	}
+
 	nr = btrfs_header_nritems(eb);
 	if (btrfs_is_leaf(eb)) {
 		btrfs_corrupt_extent_leaf(trans, root, eb);
@@ -222,6 +277,7 @@ static struct option long_options[] = {
 	{ "bytes", 1, NULL, 'b' },
 	{ "extent-record", 0, NULL, 'e' },
 	{ "extent-tree", 0, NULL, 'E' },
+	{ "keys", 0, NULL, 'k' },
 	{ 0, 0, 0, 0}
 };
 
@@ -239,12 +295,13 @@ int main(int ac, char **av)
 	u64 bytes = 4096;
 	int extent_rec = 0;
 	int extent_tree = 0;
+	int corrupt_block_keys = 0;
 
 	srand(128);
 
 	while(1) {
 		int c;
-		c = getopt_long(ac, av, "l:c:eE", long_options,
+		c = getopt_long(ac, av, "l:c:eEk", long_options,
 				&option_index);
 		if (c < 0)
 			break;
@@ -279,6 +336,9 @@ int main(int ac, char **av)
 			case 'E':
 				extent_tree = 1;
 				break;
+			case 'k':
+				corrupt_block_keys = 1;
+				break;
 			default:
 				print_usage();
 		}
@@ -324,8 +384,13 @@ int main(int ac, char **av)
 	bytes *= root->sectorsize;
 
 	while (bytes > 0) {
-		eb = debug_corrupt_block(root, logical, root->sectorsize, copy);
-		free_extent_buffer(eb);
+		if (corrupt_block_keys) {
+			corrupt_keys_in_block(root, logical);
+		} else {
+			eb = debug_corrupt_block(root, logical,
+						 root->sectorsize, copy);
+			free_extent_buffer(eb);
+		}
 		logical += root->sectorsize;
 		bytes -= root->sectorsize;
 	}
Index: btrfs-progs-v0.19-118-gfdb6c04/btrfsck.c
===================================================================
--- btrfs-progs-v0.19-118-gfdb6c04.orig/btrfsck.c
+++ btrfs-progs-v0.19-118-gfdb6c04/btrfsck.c
@@ -26,6 +26,7 @@
 #include <getopt.h>
 #include "kerncompat.h"
 #include "ctree.h"
+#include "repair.h"
 #include "disk-io.h"
 #include "print-tree.h"
 #include "transaction.h"
@@ -1718,86 +1719,6 @@ static int check_fs_roots(struct btrfs_r
 	return err;
 }
 
-static int check_node(struct btrfs_root *root,
-		      struct btrfs_disk_key *parent_key,
-		      struct extent_buffer *buf)
-{
-	int i;
-	struct btrfs_key cpukey;
-	struct btrfs_disk_key key;
-	u32 nritems = btrfs_header_nritems(buf);
-
-	if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(root))
-		return 1;
-	if (parent_key->type) {
-		btrfs_node_key(buf, &key, 0);
-		if (memcmp(parent_key, &key, sizeof(key)))
-			return 1;
-	}
-	for (i = 0; nritems > 1 && i < nritems - 2; i++) {
-		btrfs_node_key(buf, &key, i);
-		btrfs_node_key_to_cpu(buf, &cpukey, i + 1);
-		if (btrfs_comp_keys(&key, &cpukey) >= 0)
-			return 1;
-	}
-	return 0;
-}
-
-static int check_leaf(struct btrfs_root *root,
-		      struct btrfs_disk_key *parent_key,
-		      struct extent_buffer *buf)
-{
-	int i;
-	struct btrfs_key cpukey;
-	struct btrfs_disk_key key;
-	u32 nritems = btrfs_header_nritems(buf);
-
-	if (btrfs_header_level(buf) != 0) {
-		fprintf(stderr, "leaf is not a leaf %llu\n",
-		       (unsigned long long)btrfs_header_bytenr(buf));
-		return 1;
-	}
-	if (btrfs_leaf_free_space(root, buf) < 0) {
-		fprintf(stderr, "leaf free space incorrect %llu %d\n",
-			(unsigned long long)btrfs_header_bytenr(buf),
-			btrfs_leaf_free_space(root, buf));
-		return 1;
-	}
-
-	if (nritems == 0)
-		return 0;
-
-	btrfs_item_key(buf, &key, 0);
-	if (parent_key->type && memcmp(parent_key, &key, sizeof(key))) {
-		fprintf(stderr, "leaf parent key incorrect %llu\n",
-		       (unsigned long long)btrfs_header_bytenr(buf));
-		return 1;
-	}
-	for (i = 0; nritems > 1 && i < nritems - 2; i++) {
-		btrfs_item_key(buf, &key, i);
-		btrfs_item_key_to_cpu(buf, &cpukey, i + 1);
-		if (btrfs_comp_keys(&key, &cpukey) >= 0) {
-			fprintf(stderr, "bad key ordering %d %d\n", i, i+1);
-			return 1;
-		}
-		if (btrfs_item_offset_nr(buf, i) !=
-			btrfs_item_end_nr(buf, i + 1)) {
-			fprintf(stderr, "incorrect offsets %u %u\n",
-				btrfs_item_offset_nr(buf, i),
-				btrfs_item_end_nr(buf, i + 1));
-			return 1;
-		}
-		if (i == 0 && btrfs_item_end_nr(buf, i) !=
-		    BTRFS_LEAF_DATA_SIZE(root)) {
-			fprintf(stderr, "bad item end %u wanted %u\n",
-				btrfs_item_end_nr(buf, i),
-				(unsigned)BTRFS_LEAF_DATA_SIZE(root));
-			return 1;
-		}
-	}
-	return 0;
-}
-
 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
 {
 	struct list_head *cur = rec->backrefs.next;
@@ -1954,7 +1875,7 @@ static int check_owner_ref(struct btrfs_
 		btrfs_item_key_to_cpu(buf, &key, 0);
 	else
 		btrfs_node_key_to_cpu(buf, &key, 0);
-	
+
 	btrfs_init_path(&path);
 	path.lowest_level = level + 1;
 	btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
@@ -1967,6 +1888,48 @@ static int check_owner_ref(struct btrfs_
 	return found ? 0 : 1;
 }
 
+static int is_extent_tree_record(struct extent_record *rec)
+{
+	struct list_head *cur = rec->backrefs.next;
+	struct extent_backref *node;
+	struct tree_backref *back;
+	int is_extent = 0;
+
+	while(cur != &rec->backrefs) {
+		node = list_entry(cur, struct extent_backref, list);
+		cur = cur->next;
+		if (node->is_data)
+			return 0;
+		back = (struct tree_backref *)node;
+		if (node->full_backref)
+			return 0;
+		if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
+			is_extent = 1;
+	}
+	return is_extent;
+}
+
+
+static int record_bad_block_io(struct btrfs_fs_info *info,
+			       struct cache_tree *extent_cache,
+			       u64 start, u64 len)
+{
+	struct extent_record *rec;
+	struct cache_extent *cache;
+	struct btrfs_key key;
+
+	cache = find_cache_extent(extent_cache, start, len);
+	if (!cache)
+		return 0;
+
+	rec = container_of(cache, struct extent_record, cache);
+	if (!is_extent_tree_record(rec))
+		return 0;
+
+	btrfs_disk_key_to_cpu(&key, &rec->parent_key);
+	return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
+}
+
 static int check_block(struct btrfs_root *root,
 		       struct cache_tree *extent_cache,
 		       struct extent_buffer *buf, u64 flags)
@@ -1995,11 +1958,11 @@ static int check_block(struct btrfs_root
 	}
 	rec->info_level = level;
 
-	if (btrfs_is_leaf(buf)) {
-		ret = check_leaf(root, &rec->parent_key, buf);
-	} else {
-		ret = check_node(root, &rec->parent_key, buf);
-	}
+	if (btrfs_is_leaf(buf))
+		ret = btrfs_check_leaf(root, &rec->parent_key, buf);
+	else
+		ret = btrfs_check_node(root, &rec->parent_key, buf);
+
 	if (ret) {
 		fprintf(stderr, "bad block %llu\n",
 			(unsigned long long)buf->start);
@@ -2550,6 +2513,13 @@ static int run_next_block(struct btrfs_r
 
 	/* fixme, get the real parent transid */
 	buf = read_tree_block(root, bytenr, size, 0);
+	if (!extent_buffer_uptodate(buf)) {
+		record_bad_block_io(root->fs_info,
+				    extent_cache, bytenr, size);
+		free_extent_buffer(buf);
+		goto out;
+	}
+
 	nritems = btrfs_header_nritems(buf);
 
 	ret = btrfs_lookup_extent_info(NULL, root, bytenr, size, NULL, &flags);
@@ -2565,6 +2535,8 @@ static int run_next_block(struct btrfs_r
 	}
 
 	ret = check_block(root, extent_cache, buf, flags);
+	if (ret)
+		goto out;
 
 	if (btrfs_is_leaf(buf)) {
 		btree_space_waste += btrfs_leaf_free_space(root, buf);
@@ -2691,6 +2663,7 @@ static int run_next_block(struct btrfs_r
 	    btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
 	    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
 		found_old_backref = 1;
+out:
 	free_extent_buffer(buf);
 	return 0;
 }
@@ -3016,6 +2989,7 @@ static int fixup_extent_refs(struct btrf
 	int ret;
 	struct btrfs_path *path;
 	struct list_head *cur = rec->backrefs.next;
+	struct cache_extent *cache;
 	struct extent_backref *back;
 	int allocated = 0;
 	u64 flags = 0;
@@ -3035,6 +3009,13 @@ static int fixup_extent_refs(struct btrf
 	if (ret < 0)
 		goto out;
 
+	/* was this block corrupt?  If so, don't add references to it */
+	cache = find_cache_extent(info->corrupt_blocks, rec->start, rec->max_size);
+	if (cache) {
+		ret = 0;
+		goto out;
+	}
+
 	/* step two, recreate all the refs we did find */
 	while(cur != &rec->backrefs) {
 		back = list_entry(cur, struct extent_backref, list);
@@ -3058,6 +3039,107 @@ out:
 	return ret;
 }
 
+/* right now we only prune from the extent allocation tree */
+static int prune_one_block(struct btrfs_trans_handle *trans,
+			   struct btrfs_fs_info *info,
+			   struct btrfs_corrupt_block *corrupt)
+{
+	int ret;
+	struct btrfs_path path;
+	struct extent_buffer *eb;
+	u64 found;
+	int slot;
+	int nritems;
+	int level = corrupt->level + 1;
+
+	btrfs_init_path(&path);
+again:
+	/* we want to stop at the parent to our busted block */
+	path.lowest_level = level;
+
+	ret = btrfs_search_slot(trans, info->extent_root,
+				&corrupt->key, &path, -1, 1);
+
+	if (ret < 0)
+		goto out;
+
+	eb = path.nodes[level];
+	if (!eb) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	/*
+	 * hopefully the search gave us the block we want to prune,
+	 * lets try that first
+	 */
+	slot = path.slots[level];
+	found =  btrfs_node_blockptr(eb, slot);
+	if (found == corrupt->cache.start)
+		goto del_ptr;
+
+	nritems = btrfs_header_nritems(eb);
+
+	/* the search failed, lets scan this node and hope we find it */
+	for (slot = 0; slot < nritems; slot++) {
+		found =  btrfs_node_blockptr(eb, slot);
+		if (found == corrupt->cache.start)
+			goto del_ptr;
+	}
+	/*
+	 * we couldn't find the bad block.  TODO, search all the nodes for pointers
+	 * to this block
+	 */
+	if (eb == info->extent_root->node) {
+		ret = -ENOENT;
+		goto out;
+	} else {
+		level++;
+		btrfs_release_path(NULL, &path);
+		goto again;
+	}
+
+del_ptr:
+	printk("deleting pointer to block %Lu\n", corrupt->cache.start);
+	ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
+
+out:
+	btrfs_release_path(NULL, &path);
+	return ret;
+}
+
+static int prune_corrupt_blocks(struct btrfs_trans_handle *trans,
+				struct btrfs_fs_info *info)
+{
+	struct cache_extent *cache;
+	struct btrfs_corrupt_block *corrupt;
+
+	cache = find_first_cache_extent(info->corrupt_blocks, 0);
+	while (1) {
+		if (!cache)
+			break;
+		corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
+		prune_one_block(trans, info, corrupt);
+		cache = next_cache_extent(cache);
+	}
+	return 0;
+}
+
+static void free_corrupt_blocks(struct btrfs_fs_info *info)
+{
+	struct cache_extent *cache;
+	struct btrfs_corrupt_block *corrupt;
+
+	while (1) {
+		cache = find_first_cache_extent(info->corrupt_blocks, 0);
+		if (!cache)
+			break;
+		corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
+		remove_cache_extent(info->corrupt_blocks, cache);
+		free(corrupt);
+	}
+}
+
 static int check_extent_refs(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root,
 			     struct cache_tree *extent_cache, int repair)
@@ -3082,6 +3164,16 @@ static int check_extent_refs(struct btrf
 					 rec->start, rec->max_size);
 			cache = next_cache_extent(cache);
 		}
+
+		/* pin down all the corrupted blocks too */
+		cache = find_first_cache_extent(root->fs_info->corrupt_blocks, 0);
+		while(cache) {
+			rec = container_of(cache, struct extent_record, cache);
+			btrfs_pin_extent(root->fs_info,
+					 rec->start, rec->max_size);
+			cache = next_cache_extent(cache);
+		}
+		prune_corrupt_blocks(trans, root->fs_info);
 	}
 	while(1) {
 		fixed = 0;
@@ -3159,6 +3251,7 @@ static int check_extents(struct btrfs_tr
 	struct cache_tree pending;
 	struct cache_tree reada;
 	struct cache_tree nodes;
+	struct cache_tree corrupt_blocks;
 	struct btrfs_path path;
 	struct btrfs_key key;
 	struct btrfs_key found_key;
@@ -3175,10 +3268,12 @@ static int check_extents(struct btrfs_tr
 	cache_tree_init(&pending);
 	cache_tree_init(&nodes);
 	cache_tree_init(&reada);
+	cache_tree_init(&corrupt_blocks);
 
 	if (repair) {
 		root->fs_info->fsck_extent_cache = &extent_cache;
 		root->fs_info->free_extent_hook = free_extent_hook;
+		root->fs_info->corrupt_blocks = &corrupt_blocks;
 	}
 
 	bits_nr = 1024;
@@ -3241,8 +3336,10 @@ static int check_extents(struct btrfs_tr
 	ret = check_extent_refs(trans, root, &extent_cache, repair);
 
 	if (repair) {
+		free_corrupt_blocks(root->fs_info);
 		root->fs_info->fsck_extent_cache = NULL;
 		root->fs_info->free_extent_hook = NULL;
+		root->fs_info->corrupt_blocks = NULL;
 	}
 
 	free(bits);
Index: btrfs-progs-v0.19-118-gfdb6c04/ctree.c
===================================================================
--- btrfs-progs-v0.19-118-gfdb6c04.orig/ctree.c
+++ btrfs-progs-v0.19-118-gfdb6c04/ctree.c
@@ -19,6 +19,7 @@
 #include "disk-io.h"
 #include "transaction.h"
 #include "print-tree.h"
+#include "repair.h"
 
 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
 		      *root, struct btrfs_path *path, int level);
@@ -32,8 +33,6 @@ static int balance_node_right(struct btr
 			      struct btrfs_root *root,
 			      struct extent_buffer *dst_buf,
 			      struct extent_buffer *src_buf);
-static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		   struct btrfs_path *path, int level, int slot);
 
 inline void btrfs_init_path(struct btrfs_path *p)
 {
@@ -589,156 +588,125 @@ static inline unsigned int leaf_data_end
 	return btrfs_item_offset_nr(leaf, nr - 1);
 }
 
-static int check_node(struct btrfs_root *root, struct btrfs_path *path,
-		      int level)
+int btrfs_check_node(struct btrfs_root *root,
+		      struct btrfs_disk_key *parent_key,
+		      struct extent_buffer *buf)
 {
-	struct extent_buffer *parent = NULL;
-	struct extent_buffer *node = path->nodes[level];
-	struct btrfs_disk_key parent_key;
-	struct btrfs_disk_key node_key;
-	int parent_slot;
-	int slot;
+	int i;
 	struct btrfs_key cpukey;
-	u32 nritems = btrfs_header_nritems(node);
+	struct btrfs_disk_key key;
+	u32 nritems = btrfs_header_nritems(buf);
 
-	if (path->nodes[level + 1])
-		parent = path->nodes[level + 1];
+	if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(root))
+		goto fail;
 
-	slot = path->slots[level];
-	BUG_ON(nritems == 0);
-	if (parent) {
-		parent_slot = path->slots[level + 1];
-		btrfs_node_key(parent, &parent_key, parent_slot);
-		btrfs_node_key(node, &node_key, 0);
-		BUG_ON(memcmp(&parent_key, &node_key,
-			      sizeof(struct btrfs_disk_key)));
-		BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
-		       btrfs_header_bytenr(node));
-	}
-	BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
-	if (slot != 0) {
-		btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
-		btrfs_node_key(node, &node_key, slot);
-		BUG_ON(btrfs_comp_keys(&node_key, &cpukey) <= 0);
-	}
-	if (slot < nritems - 1) {
-		btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
-		btrfs_node_key(node, &node_key, slot);
-		BUG_ON(btrfs_comp_keys(&node_key, &cpukey) >= 0);
+	if (parent_key && parent_key->type) {
+		btrfs_node_key(buf, &key, 0);
+		if (memcmp(parent_key, &key, sizeof(key)))
+			goto fail;
+	}
+	for (i = 0; nritems > 1 && i < nritems - 2; i++) {
+		btrfs_node_key(buf, &key, i);
+		btrfs_node_key_to_cpu(buf, &cpukey, i + 1);
+		if (btrfs_comp_keys(&key, &cpukey) >= 0)
+			goto fail;
 	}
 	return 0;
+fail:
+	if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) {
+		if (parent_key)
+			btrfs_disk_key_to_cpu(&cpukey, parent_key);
+		else
+			btrfs_node_key_to_cpu(buf, &cpukey, 0);
+		btrfs_add_corrupt_extent_record(root->fs_info, &cpukey,
+						buf->start, buf->len,
+						btrfs_header_level(buf));
+	}
+	return -EIO;
 }
 
-static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
-		      int level)
+int btrfs_check_leaf(struct btrfs_root *root,
+		      struct btrfs_disk_key *parent_key,
+		      struct extent_buffer *buf)
 {
-	struct extent_buffer *leaf = path->nodes[level];
-	struct extent_buffer *parent = NULL;
-	int parent_slot;
+	int i;
 	struct btrfs_key cpukey;
-	struct btrfs_disk_key parent_key;
-	struct btrfs_disk_key leaf_key;
-	int slot = path->slots[0];
-
-	u32 nritems = btrfs_header_nritems(leaf);
+	struct btrfs_disk_key key;
+	u32 nritems = btrfs_header_nritems(buf);
 
-	if (path->nodes[level + 1])
-		parent = path->nodes[level + 1];
+	if (btrfs_header_level(buf) != 0) {
+		fprintf(stderr, "leaf is not a leaf %llu\n",
+		       (unsigned long long)btrfs_header_bytenr(buf));
+		goto fail;
+	}
+	if (btrfs_leaf_free_space(root, buf) < 0) {
+		fprintf(stderr, "leaf free space incorrect %llu %d\n",
+			(unsigned long long)btrfs_header_bytenr(buf),
+			btrfs_leaf_free_space(root, buf));
+		goto fail;
+	}
 
 	if (nritems == 0)
 		return 0;
 
-	if (parent) {
-		parent_slot = path->slots[level + 1];
-		btrfs_node_key(parent, &parent_key, parent_slot);
-		btrfs_item_key(leaf, &leaf_key, 0);
-
-		BUG_ON(memcmp(&parent_key, &leaf_key,
-		       sizeof(struct btrfs_disk_key)));
-		BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
-		       btrfs_header_bytenr(leaf));
+	btrfs_item_key(buf, &key, 0);
+	if (parent_key && parent_key->type &&
+	    memcmp(parent_key, &key, sizeof(key))) {
+		fprintf(stderr, "leaf parent key incorrect %llu\n",
+		       (unsigned long long)btrfs_header_bytenr(buf));
+		goto fail;
 	}
-#if 0
 	for (i = 0; nritems > 1 && i < nritems - 2; i++) {
-		btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
-		btrfs_item_key(leaf, &leaf_key, i);
-		if (comp_keys(&leaf_key, &cpukey) >= 0) {
-			btrfs_print_leaf(root, leaf);
-			printk("slot %d offset bad key\n", i);
-			BUG_ON(1);
-		}
-		if (btrfs_item_offset_nr(leaf, i) !=
-			btrfs_item_end_nr(leaf, i + 1)) {
-			btrfs_print_leaf(root, leaf);
-			printk("slot %d offset bad\n", i);
-			BUG_ON(1);
-		}
-		if (i == 0) {
-			if (btrfs_item_offset_nr(leaf, i) +
-			       btrfs_item_size_nr(leaf, i) !=
-			       BTRFS_LEAF_DATA_SIZE(root)) {
-				btrfs_print_leaf(root, leaf);
-				printk("slot %d first offset bad\n", i);
-				BUG_ON(1);
-			}
+		btrfs_item_key(buf, &key, i);
+		btrfs_item_key_to_cpu(buf, &cpukey, i + 1);
+		if (btrfs_comp_keys(&key, &cpukey) >= 0) {
+			fprintf(stderr, "bad key ordering %d %d\n", i, i+1);
+			goto fail;
+		}
+		if (btrfs_item_offset_nr(buf, i) !=
+			btrfs_item_end_nr(buf, i + 1)) {
+			fprintf(stderr, "incorrect offsets %u %u\n",
+				btrfs_item_offset_nr(buf, i),
+				btrfs_item_end_nr(buf, i + 1));
+			goto fail;
+		}
+		if (i == 0 && btrfs_item_end_nr(buf, i) !=
+		    BTRFS_LEAF_DATA_SIZE(root)) {
+			fprintf(stderr, "bad item end %u wanted %u\n",
+				btrfs_item_end_nr(buf, i),
+				(unsigned)BTRFS_LEAF_DATA_SIZE(root));
+			goto fail;
 		}
 	}
-	if (nritems > 0) {
-		if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
-				btrfs_print_leaf(root, leaf);
-				printk("slot %d bad size \n", nritems - 1);
-				BUG_ON(1);
-		}
-	}
-#endif
-	if (slot != 0 && slot < nritems - 1) {
-		btrfs_item_key(leaf, &leaf_key, slot);
-		btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
-		if (btrfs_comp_keys(&leaf_key, &cpukey) <= 0) {
-			btrfs_print_leaf(root, leaf);
-			printk("slot %d offset bad key\n", slot);
-			BUG_ON(1);
-		}
-		if (btrfs_item_offset_nr(leaf, slot - 1) !=
-		       btrfs_item_end_nr(leaf, slot)) {
-			btrfs_print_leaf(root, leaf);
-			printk("slot %d offset bad\n", slot);
-			BUG_ON(1);
-		}
-	}
-	if (slot < nritems - 1) {
-		btrfs_item_key(leaf, &leaf_key, slot);
-		btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
-		BUG_ON(btrfs_comp_keys(&leaf_key, &cpukey) >= 0);
-		if (btrfs_item_offset_nr(leaf, slot) !=
-			btrfs_item_end_nr(leaf, slot + 1)) {
-			btrfs_print_leaf(root, leaf);
-			printk("slot %d offset bad\n", slot);
-			BUG_ON(1);
-		}
-	}
-	BUG_ON(btrfs_item_offset_nr(leaf, 0) +
-	       btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
 	return 0;
+fail:
+	if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) {
+		if (parent_key)
+			btrfs_disk_key_to_cpu(&cpukey, parent_key);
+		else
+			btrfs_item_key_to_cpu(buf, &cpukey, 0);
+
+		btrfs_add_corrupt_extent_record(root->fs_info, &cpukey,
+						buf->start, buf->len, 0);
+	}
+	return -EIO;
 }
 
 static int noinline check_block(struct btrfs_root *root,
 				struct btrfs_path *path, int level)
 {
-	return 0;
-#if 0
-	struct extent_buffer *buf = path->nodes[level];
+	struct btrfs_disk_key key;
+	struct btrfs_disk_key *key_ptr = NULL;
+	struct extent_buffer *parent;
 
-	if (memcmp_extent_buffer(buf, root->fs_info->fsid,
-				 (unsigned long)btrfs_header_fsid(buf),
-				 BTRFS_FSID_SIZE)) {
-		printk("warning bad block %Lu\n", buf->start);
-		return 1;
+	if (path->nodes[level + 1]) {
+		parent = path->nodes[level + 1];
+		btrfs_node_key(parent, &key, path->slots[level + 1]);
+		key_ptr = &key;
 	}
-#endif
 	if (level == 0)
-		return check_leaf(root, path, level);
-	return check_node(root, path, level);
+		return btrfs_check_leaf(root, key_ptr, path->nodes[0]);
+	return btrfs_check_node(root, key_ptr, path->nodes[level]);
 }
 
 /*
@@ -924,8 +892,8 @@ static int balance_level(struct btrfs_tr
 			wait_on_tree_block_writeback(root, right);
 			free_extent_buffer(right);
 			right = NULL;
-			wret = del_ptr(trans, root, path, level + 1, pslot +
-				       1);
+			wret = btrfs_del_ptr(trans, root, path,
+					     level + 1, pslot + 1);
 			if (wret)
 				ret = wret;
 			wret = btrfs_free_extent(trans, root, bytenr,
@@ -972,7 +940,7 @@ static int balance_level(struct btrfs_tr
 		wait_on_tree_block_writeback(root, mid);
 		free_extent_buffer(mid);
 		mid = NULL;
-		wret = del_ptr(trans, root, path, level + 1, pslot);
+		wret = btrfs_del_ptr(trans, root, path, level + 1, pslot);
 		if (wret)
 			ret = wret;
 		wret = btrfs_free_extent(trans, root, bytenr, blocksize,
@@ -2699,7 +2667,7 @@ int btrfs_insert_item(struct btrfs_trans
  * continuing all the way the root if required.  The root is converted into
  * a leaf if all the nodes are emptied.
  */
-static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		   struct btrfs_path *path, int level, int slot)
 {
 	struct extent_buffer *parent = path->nodes[level];
@@ -2751,7 +2719,7 @@ static noinline int btrfs_del_leaf(struc
 	int ret;
 
 	WARN_ON(btrfs_header_generation(leaf) != trans->transid);
-	ret = del_ptr(trans, root, path, 1, path->slots[1]);
+	ret = btrfs_del_ptr(trans, root, path, 1, path->slots[1]);
 	if (ret)
 		return ret;
 
Index: btrfs-progs-v0.19-118-gfdb6c04/ctree.h
===================================================================
--- btrfs-progs-v0.19-118-gfdb6c04.orig/ctree.h
+++ btrfs-progs-v0.19-118-gfdb6c04/ctree.h
@@ -802,7 +802,8 @@ struct btrfs_fs_info {
 				u64 bytenr, u64 num_bytes, u64 parent,
 				u64 root_objectid, u64 owner, u64 offset,
 				int refs_to_drop);
-	struct cache_tree * fsck_extent_cache;
+	struct cache_tree *fsck_extent_cache;
+	struct cache_tree *corrupt_blocks;
 };
 
 /*
@@ -1857,6 +1858,14 @@ int btrfs_update_block_group(struct btrf
 			     struct btrfs_root *root, u64 bytenr, u64 num,
 			     int alloc, int mark_free);
 /* ctree.c */
+int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+		   struct btrfs_path *path, int level, int slot);
+int btrfs_check_node(struct btrfs_root *root,
+		      struct btrfs_disk_key *parent_key,
+		      struct extent_buffer *buf);
+int btrfs_check_leaf(struct btrfs_root *root,
+		      struct btrfs_disk_key *parent_key,
+		      struct extent_buffer *buf);
 int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
 		      struct btrfs_root *root);
 void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
diff --git a/repair.c b/repair.c
new file mode 100644
index 0000000..e640465
--- /dev/null
+++ b/repair.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2012 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "ctree.h"
+#include "extent-cache.h"
+#include "utils.h"
+#include "repair.h"
+
+int btrfs_add_corrupt_extent_record(struct btrfs_fs_info *info,
+				    struct btrfs_key *first_key,
+				    u64 start, u64 len, int level)
+
+{
+	int ret = 0;
+	struct btrfs_corrupt_block *corrupt;
+
+	if (!info->corrupt_blocks)
+		return 0;
+
+	corrupt = malloc(sizeof(*corrupt));
+	if (!corrupt)
+		return -ENOMEM;
+
+	memcpy(&corrupt->key, first_key, sizeof(*first_key));
+	corrupt->cache.start = start;
+	corrupt->cache.size = len;
+	corrupt->level = level;
+
+	ret = insert_existing_cache_extent(info->corrupt_blocks, &corrupt->cache);
+	if (ret)
+		free(corrupt);
+	BUG_ON(ret && ret != -EEXIST);
+	return ret;
+}
+
diff --git a/repair.h b/repair.h
new file mode 100644
index 0000000..3d0dcb9
--- /dev/null
+++ b/repair.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2012 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_REPAIR__
+#define __BTRFS_REPAIR__
+
+struct btrfs_corrupt_block {
+	struct cache_extent cache;
+	struct btrfs_key key;
+	int level;
+};
+
+int btrfs_add_corrupt_extent_record(struct btrfs_fs_info *info,
+				    struct btrfs_key *first_key,
+				    u64 start, u64 len, int level);
+
+#endif
-- 
1.7.6.233.gd79bc