File bug-1150021_02-suse-special-bcache-bug-fix.patch of Package lvm2.13753

The bcache issues (see: 2938b4dcca0 & 6b0d969b2a) had been fixed in the
end of Oct (2019).  But the stable-2.02 still unfix. I made a patch from
master branch to stable-2.02, and the patch had been passed in unit
test.

For the patch code, there is one ugly function bcache_abort_fd(), the
stable-2.02 doesn't have easy way to search block. So I wrote 4
iterators loop. It will cost O(n) on every execution.

How the bug happen:
When bcache writes data error, the errored fd and its data is saved in
cache->errored, then this fd is closed. Later lvm will reuse this
closed fd to new opened devs, but the fd related data still in
cache->errored and flags with BF_DIRTY. It makes the data may mistakenly
write to another disk.

Related master branch commits:
 2938b4dcca0a1df661758abfab7f402ea7aab018
 6b0d969b2a85ba69046afa26af4d7bcddddbccd5
 5fdebf9bbf68a53b9d2153dbd8bf27a36e0ef3cd
 25e7bf021a4e7c5ad5f925b86605bf025ff1a949

In stable-2.02 branch, bcache uses hash-table to manage blocks, but
master branch had been switched to radix tree. This commit changed
related (radix tree) functions to hash-table style.

Signed-off-by: Zhao Heming <heming.zhao@suse.com>
---
 lib/device/bcache.c  | 61 +++++++++++++++++++++++++++--
 lib/device/bcache.h  |  7 ++++
 lib/label/label.c    | 32 +++++++++------
 test/unit/bcache_t.c | 92 +++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 174 insertions(+), 18 deletions(-)

diff -Nupr a/lib/device/bcache.c b/lib/device/bcache.c
--- a/lib/device/bcache.c	2019-12-23 14:37:52.179166465 +0800
+++ b/lib/device/bcache.c	2019-12-23 14:40:11.803782520 +0800
@@ -1165,7 +1165,7 @@ static bool _invalidate_block(struct bca
 		_wait_specific(b);
 
 		if (b->error)
-        		return false;
+			return false;
 	}
 
 	_recycle_block(cache, b);
@@ -1186,18 +1186,71 @@ bool bcache_invalidate_fd(struct bcache
 	bool r = true;
 
 	// Start writing back any dirty blocks on this fd.
-	dm_list_iterate_items_safe (b, tmp, &cache->dirty)
+	dm_list_iterate_items_safe(b, tmp, &cache->dirty)
 		if (b->fd == fd)
 			_issue_write(b);
 
 	_wait_all(cache);
 
 	// Everything should be in the clean list now.
-	dm_list_iterate_items_safe (b, tmp, &cache->clean)
+	dm_list_iterate_items_safe(b, tmp, &cache->clean)
 		if (b->fd == fd)
 			r = _invalidate_block(cache, b) && r;
 
-       return r;
+	if (r)
+		_hash_remove(b);
+
+	return r;
+}
+
+//----------------------------------------------------------------
+
+static bool _abort_v(struct block *b)
+{
+	if (b->ref_count) {
+		log_fatal("bcache_abort: block (%d, %llu) still held",
+					b->fd, (unsigned long long) b->index);
+		return true;
+	}
+
+	_unlink_block(b);
+	dm_list_add(&b->cache->free, &b->list);
+
+	// We can't remove the block from the radix tree yet because
+	// we're in the middle of an iteration.
+	return true;
+}
+
+/* This function searches all impossible list to find out fd. */
+void bcache_abort_fd(struct bcache *cache, int fd)
+{
+	struct block *b, *tmp;
+
+	dm_list_iterate_items_safe(b, tmp, &cache->errored)
+		if (b->fd == fd) {
+			_abort_v(b);
+			_hash_remove(b);
+		}
+
+	dm_list_iterate_items_safe(b, tmp, &cache->dirty)
+		if (b->fd == fd) {
+			_abort_v(b);
+			_hash_remove(b);
+		}
+
+	dm_list_iterate_items_safe(b, tmp, &cache->io_pending)
+		if (b->fd == fd) {
+			_abort_v(b);
+			_hash_remove(b);
+		}
+
+	dm_list_iterate_items_safe(b, tmp, &cache->clean)
+		if (b->fd == fd) {
+			_abort_v(b);
+			_hash_remove(b);
+		}
+
+	return;
 }
 
 //----------------------------------------------------------------
diff -Nupr a/lib/device/bcache.h b/lib/device/bcache.h
--- a/lib/device/bcache.h	2019-12-23 14:37:52.179166465 +0800
+++ b/lib/device/bcache.h	2019-12-23 14:40:15.123797169 +0800
@@ -145,6 +145,13 @@ bool bcache_invalidate(struct bcache *ca
  */
 bool bcache_invalidate_fd(struct bcache *cache, int fd);
 
+/*
+ * Call this function if flush, or invalidate fail and you do not
+ * wish to retry the writes.  This will throw away any dirty data
+ * not written.  If any blocks for fd are held, then it will call
+ * abort().
+ */
+void bcache_abort_fd(struct bcache *cache, int fd);
 
 //----------------------------------------------------------------
 // The next four functions are utilities written in terms of the above api.
diff -Nupr a/lib/label/label.c b/lib/label/label.c
--- a/lib/label/label.c	2019-12-23 14:38:05.131223614 +0800
+++ b/lib/label/label.c	2019-12-23 14:40:27.739852834 +0800
@@ -592,6 +592,14 @@ static void _drop_bad_aliases(struct dev
 	}
 }
 
+// Like bcache_invalidate, only it throws any dirty data away if the
+// write fails.
+static void _invalidate_fd(struct bcache *cache, int fd)
+{
+	if (!bcache_invalidate_fd(cache, fd))
+		bcache_abort_fd(cache, fd);
+}
+
 /*
  * Read or reread label/metadata from selected devs.
  *
@@ -702,7 +710,7 @@ static int _scan_list(struct cmd_context
 		 * drop it from bcache.
 		 */
 		if (scan_failed || !is_lvm_device) {
-			bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd);
+			_invalidate_fd(scan_bcache, devl->dev->bcache_fd);
 			_scan_dev_close(devl->dev);
 		}
 
@@ -859,7 +867,7 @@ int label_scan(struct cmd_context *cmd)
 		 * so this will usually not be true.
 		 */
 		if (_in_bcache(dev)) {
-			bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
+			_invalidate_fd(scan_bcache, dev->bcache_fd);
 			_scan_dev_close(dev);
 		}
 
@@ -943,7 +951,7 @@ int label_scan_pvscan_all(struct cmd_con
 		 * so this will usually not be true.
 		 */
 		if (_in_bcache(dev)) {
-			bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
+			_invalidate_fd(scan_bcache, dev->bcache_fd);
 			_scan_dev_close(dev);
 		}
 
@@ -1006,7 +1014,7 @@ int label_scan_devs(struct cmd_context *
 
 	dm_list_iterate_items(devl, devs) {
 		if (_in_bcache(devl->dev)) {
-			bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd);
+			_invalidate_fd(scan_bcache, devl->dev->bcache_fd);
 			_scan_dev_close(devl->dev);
 		}
 	}
@@ -1025,7 +1033,7 @@ int label_scan_devs_rw(struct cmd_contex
 
 	dm_list_iterate_items(devl, devs) {
 		if (_in_bcache(devl->dev)) {
-			bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd);
+			_invalidate_fd(scan_bcache, devl->dev->bcache_fd);
 			_scan_dev_close(devl->dev);
 		}
 
@@ -1047,7 +1055,7 @@ int label_scan_devs_excl(struct dm_list
 
 	dm_list_iterate_items(devl, devs) {
 		if (_in_bcache(devl->dev)) {
-			bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd);
+			_invalidate_fd(scan_bcache, devl->dev->bcache_fd);
 			_scan_dev_close(devl->dev);
 		}
 		/*
@@ -1067,7 +1075,7 @@ int label_scan_devs_excl(struct dm_list
 void label_scan_invalidate(struct device *dev)
 {
 	if (_in_bcache(dev)) {
-		bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
+		_invalidate_fd(scan_bcache, dev->bcache_fd);
 		_scan_dev_close(dev);
 	}
 }
@@ -1147,7 +1155,7 @@ int label_read(struct device *dev)
 	dm_list_add(&one_dev, &devl->list);
 
 	if (_in_bcache(dev)) {
-		bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
+		_invalidate_fd(scan_bcache, dev->bcache_fd);
 		_scan_dev_close(dev);
 	}
 
@@ -1253,7 +1261,7 @@ int label_scan_open_excl(struct device *
 	if (_in_bcache(dev) && !(dev->flags & DEV_BCACHE_EXCL)) {
 		/* FIXME: avoid tossing out bcache blocks just to replace fd. */
 		log_debug("Close and reopen excl %s", dev_name(dev));
-		bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
+		_invalidate_fd(scan_bcache, dev->bcache_fd);
 		_scan_dev_close(dev);
 	}
 	dev->flags |= DEV_BCACHE_EXCL;
@@ -1302,7 +1310,7 @@ bool dev_write_bytes(struct device *dev,
 	if (!(dev->flags & DEV_BCACHE_WRITE)) {
 		/* FIXME: avoid tossing out bcache blocks just to replace fd. */
 		log_debug("Close and reopen to write %s", dev_name(dev));
-		bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
+		_invalidate_fd(scan_bcache, dev->bcache_fd);
 		_scan_dev_close(dev);
 
 		dev->flags |= DEV_BCACHE_WRITE;
@@ -1350,7 +1358,7 @@ bool dev_write_zeros(struct device *dev,
 	if (!(dev->flags & DEV_BCACHE_WRITE)) {
 		/* FIXME: avoid tossing out bcache blocks just to replace fd. */
 		log_debug("Close and reopen to write %s", dev_name(dev));
-		bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
+		_invalidate_fd(scan_bcache, dev->bcache_fd);
 		_scan_dev_close(dev);
 
 		dev->flags |= DEV_BCACHE_WRITE;
@@ -1401,7 +1409,7 @@ bool dev_set_bytes(struct device *dev, u
 	if (!(dev->flags & DEV_BCACHE_WRITE)) {
 		/* FIXME: avoid tossing out bcache blocks just to replace fd. */
 		log_debug("Close and reopen to write %s", dev_name(dev));
-		bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
+		_invalidate_fd(scan_bcache, dev->bcache_fd);
 		_scan_dev_close(dev);
 
 		dev->flags |= DEV_BCACHE_WRITE;
diff -Nupr a/test/unit/bcache_t.c b/test/unit/bcache_t.c
--- a/test/unit/bcache_t.c	2019-12-23 14:38:19.691287854 +0800
+++ b/test/unit/bcache_t.c	2019-12-23 14:40:45.459931020 +0800
@@ -794,7 +794,6 @@ static void test_invalidate_after_write_
 
 static void test_invalidate_held_block(void *context)
 {
-
 	struct fixture *f = context;
 	struct mock_engine *me = f->me;
 	struct bcache *cache = f->cache;
@@ -811,6 +810,89 @@ static void test_invalidate_held_block(v
 }
 
 //----------------------------------------------------------------
+// abort tests
+static void test_abort_no_blocks(void *context)
+{
+	struct fixture *f = context;
+	struct bcache *cache = f->cache;
+	int fd = 17;
+
+	// We have no expectations
+	bcache_abort_fd(cache, fd);
+}
+
+static void test_abort_single_block(void *context)
+{
+	struct fixture *f = context;
+	struct bcache *cache = f->cache;
+	struct block *b;
+	int fd = 17;
+
+	T_ASSERT(bcache_get(cache, fd, 0, GF_ZERO, &b));
+	bcache_put(b);
+
+	bcache_abort_fd(cache, fd);
+
+	// no write should be issued
+	T_ASSERT(bcache_flush(cache));
+}
+
+static void test_abort_forces_reread(void *context)
+{
+	struct fixture *f = context;
+	struct mock_engine *me = f->me;
+	struct bcache *cache = f->cache;
+	struct block *b;
+	int fd = 17;
+
+	_expect_read(me, fd, 0);
+	_expect(me, E_WAIT);
+	T_ASSERT(bcache_get(cache, fd, 0, GF_DIRTY, &b));
+	bcache_put(b);
+
+	bcache_abort_fd(cache, fd);
+	T_ASSERT(bcache_flush(cache));
+
+	// Check the block is re-read
+	_expect_read(me, fd, 0);
+	_expect(me, E_WAIT);
+	T_ASSERT(bcache_get(cache, fd, 0, 0, &b));
+	bcache_put(b);
+}
+
+static void test_abort_only_specific_fd(void *context)
+{
+	struct fixture *f = context;
+	struct mock_engine *me = f->me;
+	struct bcache *cache = f->cache;
+	struct block *b;
+	int fd1 = 17, fd2 = 18;
+
+	T_ASSERT(bcache_get(cache, fd1, 0, GF_ZERO, &b));
+	bcache_put(b);
+
+	T_ASSERT(bcache_get(cache, fd1, 1, GF_ZERO, &b));
+	bcache_put(b);
+
+	T_ASSERT(bcache_get(cache, fd2, 0, GF_ZERO, &b));
+	bcache_put(b);
+
+	T_ASSERT(bcache_get(cache, fd2, 1, GF_ZERO, &b));
+	bcache_put(b);
+
+	bcache_abort_fd(cache, fd2);
+
+	// writes for fd1 should still be issued
+	_expect_write(me, fd1, 0);
+	_expect_write(me, fd1, 1);
+
+	_expect(me, E_WAIT);
+	_expect(me, E_WAIT);
+
+	T_ASSERT(bcache_flush(cache));
+}
+
+//----------------------------------------------------------------
 // Chasing a bug reported by dct
 
 static void _cycle(struct fixture *f, unsigned nr_cache_blocks)
@@ -898,6 +980,12 @@ static struct test_suite *_small_tests(v
 	T("invalidate-read-error", "invalidate a block that errored", test_invalidate_after_read_error);
 	T("invalidate-write-error", "invalidate a block that errored", test_invalidate_after_write_error);
 	T("invalidate-fails-in-held", "invalidating a held block fails", test_invalidate_held_block);
+
+	T("abort-with-no-blocks", "you can call abort, even if there are no blocks in the cache", test_abort_no_blocks);
+	T("abort-single-block", "single block get silently discarded", test_abort_single_block);
+	T("abort-forces-read", "if a block has been discarded then another read is necc.", test_abort_forces_reread);
+	T("abort-specific-fd", "abort doesn't effect other fds", test_abort_only_specific_fd);
+
 	T("concurrent-reads-after-invalidate", "prefetch should still issue concurrent reads after invalidate",
           test_concurrent_reads_after_invalidate);
 
@@ -919,7 +1007,7 @@ static struct test_suite *_large_tests(v
 
 void bcache_tests(struct dm_list *all_tests)
 {
-        dm_list_add(all_tests, &_tiny_tests()->list);
+	dm_list_add(all_tests, &_tiny_tests()->list);
 	dm_list_add(all_tests, &_small_tests()->list);
 	dm_list_add(all_tests, &_large_tests()->list);
 }
openSUSE Build Service is sponsored by