File bug-1150021_02-suse-special-bcache-bug-fix.patch of Package lvm2.11953
The bcache issues (see: 2938b4dcca0 & 6b0d969b2a) had been fixed in the
end of Oct (2019). But the stable-2.02 still unfix. I made a patch from
master branch to stable-2.02, and the patch had been passed in unit
test.
For the patch code, there is one ugly function bcache_abort_fd(), the
stable-2.02 doesn't have easy way to search block. So I wrote 4
iterators loop. It will cost O(n) on every execution.
How the bug happen:
When bcache writes data error, the errored fd and its data is saved in
cache->errored, then this fd is closed. Later lvm will reuse this
closed fd to new opened devs, but the fd related data still in
cache->errored and flags with BF_DIRTY. It makes the data may mistakenly
write to another disk.
Related master branch commits:
2938b4dcca0a1df661758abfab7f402ea7aab018
6b0d969b2a85ba69046afa26af4d7bcddddbccd5
5fdebf9bbf68a53b9d2153dbd8bf27a36e0ef3cd
25e7bf021a4e7c5ad5f925b86605bf025ff1a949
In stable-2.02 branch, bcache uses hash-table to manage blocks, but
master branch had been switched to radix tree. This commit changed
related (radix tree) functions to hash-table style.
Signed-off-by: Zhao Heming <heming.zhao@suse.com>
---
lib/device/bcache.c | 61 +++++++++++++++++++++++++++--
lib/device/bcache.h | 7 ++++
lib/label/label.c | 32 +++++++++------
test/unit/bcache_t.c | 92 +++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 174 insertions(+), 18 deletions(-)
diff -Nupr a/lib/device/bcache.c b/lib/device/bcache.c
--- a/lib/device/bcache.c 2019-12-23 14:37:52.179166465 +0800
+++ b/lib/device/bcache.c 2019-12-23 14:40:11.803782520 +0800
@@ -1165,7 +1165,7 @@ static bool _invalidate_block(struct bca
_wait_specific(b);
if (b->error)
- return false;
+ return false;
}
_recycle_block(cache, b);
@@ -1186,18 +1186,71 @@ bool bcache_invalidate_fd(struct bcache
bool r = true;
// Start writing back any dirty blocks on this fd.
- dm_list_iterate_items_safe (b, tmp, &cache->dirty)
+ dm_list_iterate_items_safe(b, tmp, &cache->dirty)
if (b->fd == fd)
_issue_write(b);
_wait_all(cache);
// Everything should be in the clean list now.
- dm_list_iterate_items_safe (b, tmp, &cache->clean)
+ dm_list_iterate_items_safe(b, tmp, &cache->clean)
if (b->fd == fd)
r = _invalidate_block(cache, b) && r;
- return r;
+ if (r)
+ _hash_remove(b);
+
+ return r;
+}
+
+//----------------------------------------------------------------
+
+static bool _abort_v(struct block *b)
+{
+ if (b->ref_count) {
+ log_fatal("bcache_abort: block (%d, %llu) still held",
+ b->fd, (unsigned long long) b->index);
+ return true;
+ }
+
+ _unlink_block(b);
+ dm_list_add(&b->cache->free, &b->list);
+
+ // We can't remove the block from the radix tree yet because
+ // we're in the middle of an iteration.
+ return true;
+}
+
+/* This function searches all impossible list to find out fd. */
+void bcache_abort_fd(struct bcache *cache, int fd)
+{
+ struct block *b, *tmp;
+
+ dm_list_iterate_items_safe(b, tmp, &cache->errored)
+ if (b->fd == fd) {
+ _abort_v(b);
+ _hash_remove(b);
+ }
+
+ dm_list_iterate_items_safe(b, tmp, &cache->dirty)
+ if (b->fd == fd) {
+ _abort_v(b);
+ _hash_remove(b);
+ }
+
+ dm_list_iterate_items_safe(b, tmp, &cache->io_pending)
+ if (b->fd == fd) {
+ _abort_v(b);
+ _hash_remove(b);
+ }
+
+ dm_list_iterate_items_safe(b, tmp, &cache->clean)
+ if (b->fd == fd) {
+ _abort_v(b);
+ _hash_remove(b);
+ }
+
+ return;
}
//----------------------------------------------------------------
diff -Nupr a/lib/device/bcache.h b/lib/device/bcache.h
--- a/lib/device/bcache.h 2019-12-23 14:37:52.179166465 +0800
+++ b/lib/device/bcache.h 2019-12-23 14:40:15.123797169 +0800
@@ -145,6 +145,13 @@ bool bcache_invalidate(struct bcache *ca
*/
bool bcache_invalidate_fd(struct bcache *cache, int fd);
+/*
+ * Call this function if flush, or invalidate fail and you do not
+ * wish to retry the writes. This will throw away any dirty data
+ * not written. If any blocks for fd are held, then it will call
+ * abort().
+ */
+void bcache_abort_fd(struct bcache *cache, int fd);
//----------------------------------------------------------------
// The next four functions are utilities written in terms of the above api.
diff -Nupr a/lib/label/label.c b/lib/label/label.c
--- a/lib/label/label.c 2019-12-23 14:38:05.131223614 +0800
+++ b/lib/label/label.c 2019-12-23 14:40:27.739852834 +0800
@@ -592,6 +592,14 @@ static void _drop_bad_aliases(struct dev
}
}
+// Like bcache_invalidate, only it throws any dirty data away if the
+// write fails.
+static void _invalidate_fd(struct bcache *cache, int fd)
+{
+ if (!bcache_invalidate_fd(cache, fd))
+ bcache_abort_fd(cache, fd);
+}
+
/*
* Read or reread label/metadata from selected devs.
*
@@ -702,7 +710,7 @@ static int _scan_list(struct cmd_context
* drop it from bcache.
*/
if (scan_failed || !is_lvm_device) {
- bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd);
+ _invalidate_fd(scan_bcache, devl->dev->bcache_fd);
_scan_dev_close(devl->dev);
}
@@ -859,7 +867,7 @@ int label_scan(struct cmd_context *cmd)
* so this will usually not be true.
*/
if (_in_bcache(dev)) {
- bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
+ _invalidate_fd(scan_bcache, dev->bcache_fd);
_scan_dev_close(dev);
}
@@ -943,7 +951,7 @@ int label_scan_pvscan_all(struct cmd_con
* so this will usually not be true.
*/
if (_in_bcache(dev)) {
- bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
+ _invalidate_fd(scan_bcache, dev->bcache_fd);
_scan_dev_close(dev);
}
@@ -1006,7 +1014,7 @@ int label_scan_devs(struct cmd_context *
dm_list_iterate_items(devl, devs) {
if (_in_bcache(devl->dev)) {
- bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd);
+ _invalidate_fd(scan_bcache, devl->dev->bcache_fd);
_scan_dev_close(devl->dev);
}
}
@@ -1025,7 +1033,7 @@ int label_scan_devs_rw(struct cmd_contex
dm_list_iterate_items(devl, devs) {
if (_in_bcache(devl->dev)) {
- bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd);
+ _invalidate_fd(scan_bcache, devl->dev->bcache_fd);
_scan_dev_close(devl->dev);
}
@@ -1047,7 +1055,7 @@ int label_scan_devs_excl(struct dm_list
dm_list_iterate_items(devl, devs) {
if (_in_bcache(devl->dev)) {
- bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd);
+ _invalidate_fd(scan_bcache, devl->dev->bcache_fd);
_scan_dev_close(devl->dev);
}
/*
@@ -1067,7 +1075,7 @@ int label_scan_devs_excl(struct dm_list
void label_scan_invalidate(struct device *dev)
{
if (_in_bcache(dev)) {
- bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
+ _invalidate_fd(scan_bcache, dev->bcache_fd);
_scan_dev_close(dev);
}
}
@@ -1147,7 +1155,7 @@ int label_read(struct device *dev)
dm_list_add(&one_dev, &devl->list);
if (_in_bcache(dev)) {
- bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
+ _invalidate_fd(scan_bcache, dev->bcache_fd);
_scan_dev_close(dev);
}
@@ -1253,7 +1261,7 @@ int label_scan_open_excl(struct device *
if (_in_bcache(dev) && !(dev->flags & DEV_BCACHE_EXCL)) {
/* FIXME: avoid tossing out bcache blocks just to replace fd. */
log_debug("Close and reopen excl %s", dev_name(dev));
- bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
+ _invalidate_fd(scan_bcache, dev->bcache_fd);
_scan_dev_close(dev);
}
dev->flags |= DEV_BCACHE_EXCL;
@@ -1302,7 +1310,7 @@ bool dev_write_bytes(struct device *dev,
if (!(dev->flags & DEV_BCACHE_WRITE)) {
/* FIXME: avoid tossing out bcache blocks just to replace fd. */
log_debug("Close and reopen to write %s", dev_name(dev));
- bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
+ _invalidate_fd(scan_bcache, dev->bcache_fd);
_scan_dev_close(dev);
dev->flags |= DEV_BCACHE_WRITE;
@@ -1350,7 +1358,7 @@ bool dev_write_zeros(struct device *dev,
if (!(dev->flags & DEV_BCACHE_WRITE)) {
/* FIXME: avoid tossing out bcache blocks just to replace fd. */
log_debug("Close and reopen to write %s", dev_name(dev));
- bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
+ _invalidate_fd(scan_bcache, dev->bcache_fd);
_scan_dev_close(dev);
dev->flags |= DEV_BCACHE_WRITE;
@@ -1401,7 +1409,7 @@ bool dev_set_bytes(struct device *dev, u
if (!(dev->flags & DEV_BCACHE_WRITE)) {
/* FIXME: avoid tossing out bcache blocks just to replace fd. */
log_debug("Close and reopen to write %s", dev_name(dev));
- bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
+ _invalidate_fd(scan_bcache, dev->bcache_fd);
_scan_dev_close(dev);
dev->flags |= DEV_BCACHE_WRITE;
diff -Nupr a/test/unit/bcache_t.c b/test/unit/bcache_t.c
--- a/test/unit/bcache_t.c 2019-12-23 14:38:19.691287854 +0800
+++ b/test/unit/bcache_t.c 2019-12-23 14:40:45.459931020 +0800
@@ -794,7 +794,6 @@ static void test_invalidate_after_write_
static void test_invalidate_held_block(void *context)
{
-
struct fixture *f = context;
struct mock_engine *me = f->me;
struct bcache *cache = f->cache;
@@ -811,6 +810,89 @@ static void test_invalidate_held_block(v
}
//----------------------------------------------------------------
+// abort tests
+static void test_abort_no_blocks(void *context)
+{
+ struct fixture *f = context;
+ struct bcache *cache = f->cache;
+ int fd = 17;
+
+ // We have no expectations
+ bcache_abort_fd(cache, fd);
+}
+
+static void test_abort_single_block(void *context)
+{
+ struct fixture *f = context;
+ struct bcache *cache = f->cache;
+ struct block *b;
+ int fd = 17;
+
+ T_ASSERT(bcache_get(cache, fd, 0, GF_ZERO, &b));
+ bcache_put(b);
+
+ bcache_abort_fd(cache, fd);
+
+ // no write should be issued
+ T_ASSERT(bcache_flush(cache));
+}
+
+static void test_abort_forces_reread(void *context)
+{
+ struct fixture *f = context;
+ struct mock_engine *me = f->me;
+ struct bcache *cache = f->cache;
+ struct block *b;
+ int fd = 17;
+
+ _expect_read(me, fd, 0);
+ _expect(me, E_WAIT);
+ T_ASSERT(bcache_get(cache, fd, 0, GF_DIRTY, &b));
+ bcache_put(b);
+
+ bcache_abort_fd(cache, fd);
+ T_ASSERT(bcache_flush(cache));
+
+ // Check the block is re-read
+ _expect_read(me, fd, 0);
+ _expect(me, E_WAIT);
+ T_ASSERT(bcache_get(cache, fd, 0, 0, &b));
+ bcache_put(b);
+}
+
+static void test_abort_only_specific_fd(void *context)
+{
+ struct fixture *f = context;
+ struct mock_engine *me = f->me;
+ struct bcache *cache = f->cache;
+ struct block *b;
+ int fd1 = 17, fd2 = 18;
+
+ T_ASSERT(bcache_get(cache, fd1, 0, GF_ZERO, &b));
+ bcache_put(b);
+
+ T_ASSERT(bcache_get(cache, fd1, 1, GF_ZERO, &b));
+ bcache_put(b);
+
+ T_ASSERT(bcache_get(cache, fd2, 0, GF_ZERO, &b));
+ bcache_put(b);
+
+ T_ASSERT(bcache_get(cache, fd2, 1, GF_ZERO, &b));
+ bcache_put(b);
+
+ bcache_abort_fd(cache, fd2);
+
+ // writes for fd1 should still be issued
+ _expect_write(me, fd1, 0);
+ _expect_write(me, fd1, 1);
+
+ _expect(me, E_WAIT);
+ _expect(me, E_WAIT);
+
+ T_ASSERT(bcache_flush(cache));
+}
+
+//----------------------------------------------------------------
// Chasing a bug reported by dct
static void _cycle(struct fixture *f, unsigned nr_cache_blocks)
@@ -898,6 +980,12 @@ static struct test_suite *_small_tests(v
T("invalidate-read-error", "invalidate a block that errored", test_invalidate_after_read_error);
T("invalidate-write-error", "invalidate a block that errored", test_invalidate_after_write_error);
T("invalidate-fails-in-held", "invalidating a held block fails", test_invalidate_held_block);
+
+ T("abort-with-no-blocks", "you can call abort, even if there are no blocks in the cache", test_abort_no_blocks);
+ T("abort-single-block", "single block get silently discarded", test_abort_single_block);
+ T("abort-forces-read", "if a block has been discarded then another read is necc.", test_abort_forces_reread);
+ T("abort-specific-fd", "abort doesn't effect other fds", test_abort_only_specific_fd);
+
T("concurrent-reads-after-invalidate", "prefetch should still issue concurrent reads after invalidate",
test_concurrent_reads_after_invalidate);
@@ -919,7 +1007,7 @@ static struct test_suite *_large_tests(v
void bcache_tests(struct dm_list *all_tests)
{
- dm_list_add(all_tests, &_tiny_tests()->list);
+ dm_list_add(all_tests, &_tiny_tests()->list);
dm_list_add(all_tests, &_small_tests()->list);
dm_list_add(all_tests, &_large_tests()->list);
}