File bug-1149408_Fix-rounding-writes-up-to-sector-size.patch of Package lvm2.17787
From 7f347698e3d09b15b4f9aed9c61239fda7b9e8c8 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Fri, 26 Jul 2019 14:21:08 -0500
Subject: [PATCH] Fix rounding writes up to sector size
Do this at two levels, although one would be enough to
fix the problem seen recently:
- Ignore any reported sector size other than 512 of 4096.
  If either sector size (physical or logical) is reported
  as 512, then use 512.  If neither are reported as 512,
  and one or the other is reported as 4096, then use 4096.
  If neither is reported as either 512 or 4096, then use 512.
- When rounding up a limited write in bcache to be a multiple
  of the sector size, check that the resulting write size is
  not larger than the bcache block itself.  (This shouldn't
  happen if the sector size is 512 or 4096.)
---
 lib/device/bcache.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 lib/device/dev-io.c | 52 +++++++++++++++++++++++++++++++
 lib/device/device.h |  8 +++--
 lib/label/label.c   | 30 ++++++++++++++----
 4 files changed, 169 insertions(+), 10 deletions(-)
diff --git a/lib/device/bcache.c b/lib/device/bcache.c
index 7b0935352..04fbf3521 100644
--- a/lib/device/bcache.c
+++ b/lib/device/bcache.c
@@ -169,6 +169,7 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
 	sector_t offset;
 	sector_t nbytes;
 	sector_t limit_nbytes;
+	sector_t orig_nbytes;
 	sector_t extra_nbytes = 0;
 
 	if (((uintptr_t) data) & e->page_mask) {
@@ -191,11 +192,41 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
 			return false;
 		}
 
+		/*
+		 * If the bcache block offset+len goes beyond where lvm is
+		 * intending to write, then reduce the len being written
+		 * (which is the bcache block size) so we don't write past
+		 * the limit set by lvm.  If after applying the limit, the
+		 * resulting size is not a multiple of the sector size (512
+		 * or 4096) then extend the reduced size to be a multiple of
+		 * the sector size (we don't want to write partial sectors.)
+		 */
 		if (offset + nbytes > _last_byte_offset) {
 			limit_nbytes = _last_byte_offset - offset;
-			if (limit_nbytes % _last_byte_sector_size)
+
+			if (limit_nbytes % _last_byte_sector_size) {
 				extra_nbytes = _last_byte_sector_size - (limit_nbytes % _last_byte_sector_size);
 
+				/*
+				 * adding extra_nbytes to the reduced nbytes (limit_nbytes)
+				 * should make the final write size a multiple of the
+				 * sector size.  This should never result in a final size
+				 * larger than the bcache block size (as long as the bcache
+				 * block size is a multiple of the sector size).
+				 */
+				if (limit_nbytes + extra_nbytes > nbytes) {
+					log_warn("Skip extending write at %llu len %llu limit %llu extra %llu sector_size %llu",
+						 (unsigned long long)offset,
+						 (unsigned long long)nbytes,
+						 (unsigned long long)limit_nbytes,
+						 (unsigned long long)extra_nbytes,
+						 (unsigned long long)_last_byte_sector_size);
+					extra_nbytes = 0;
+				}
+			}
+
+			orig_nbytes = nbytes;
+
 			if (extra_nbytes) {
 				log_debug("Limit write at %llu len %llu to len %llu rounded to %llu",
 					  (unsigned long long)offset,
@@ -210,6 +241,22 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd,
 					  (unsigned long long)limit_nbytes);
 				nbytes = limit_nbytes;
 			}
+
+			/*
+			 * This shouldn't happen, the reduced+extended
+			 * nbytes value should never be larger than the
+			 * bcache block size.
+			 */
+			if (nbytes > orig_nbytes) {
+				log_error("Invalid adjusted write at %llu len %llu adjusted %llu limit %llu extra %llu sector_size %llu",
+					  (unsigned long long)offset,
+					  (unsigned long long)orig_nbytes,
+					  (unsigned long long)nbytes,
+					  (unsigned long long)limit_nbytes,
+					  (unsigned long long)extra_nbytes,
+					  (unsigned long long)_last_byte_sector_size);
+				return false;
+			}
 		}
 	}
 
@@ -403,6 +450,7 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
 		uint64_t nbytes = len;
 		sector_t limit_nbytes = 0;
 		sector_t extra_nbytes = 0;
+		sector_t orig_nbytes = 0;
 
 		if (offset > _last_byte_offset) {
 			log_error("Limit write at %llu len %llu beyond last byte %llu",
@@ -415,9 +463,30 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
 
 		if (offset + nbytes > _last_byte_offset) {
 			limit_nbytes = _last_byte_offset - offset;
-			if (limit_nbytes % _last_byte_sector_size)
+
+			if (limit_nbytes % _last_byte_sector_size) {
 				extra_nbytes = _last_byte_sector_size - (limit_nbytes % _last_byte_sector_size);
 
+				/*
+				 * adding extra_nbytes to the reduced nbytes (limit_nbytes)
+				 * should make the final write size a multiple of the
+				 * sector size.  This should never result in a final size
+				 * larger than the bcache block size (as long as the bcache
+				 * block size is a multiple of the sector size).
+				 */
+				if (limit_nbytes + extra_nbytes > nbytes) {
+					log_warn("Skip extending write at %llu len %llu limit %llu extra %llu sector_size %llu",
+						 (unsigned long long)offset,
+						 (unsigned long long)nbytes,
+						 (unsigned long long)limit_nbytes,
+						 (unsigned long long)extra_nbytes,
+						 (unsigned long long)_last_byte_sector_size);
+					extra_nbytes = 0;
+				}
+			}
+
+			orig_nbytes = nbytes;
+
 			if (extra_nbytes) {
 				log_debug("Limit write at %llu len %llu to len %llu rounded to %llu",
 					  (unsigned long long)offset,
@@ -432,6 +501,22 @@ static bool _sync_issue(struct io_engine *ioe, enum dir d, int fd,
 					  (unsigned long long)limit_nbytes);
 				nbytes = limit_nbytes;
 			}
+
+			/*
+			 * This shouldn't happen, the reduced+extended
+			 * nbytes value should never be larger than the
+			 * bcache block size.
+			 */
+			if (nbytes > orig_nbytes) {
+				log_error("Invalid adjusted write at %llu len %llu adjusted %llu limit %llu extra %llu sector_size %llu",
+					  (unsigned long long)offset,
+					  (unsigned long long)orig_nbytes,
+					  (unsigned long long)nbytes,
+					  (unsigned long long)limit_nbytes,
+					  (unsigned long long)extra_nbytes,
+					  (unsigned long long)_last_byte_sector_size);
+				return false;
+			}
 		}
 
 		where = offset;
diff --git a/lib/device/dev-io.c b/lib/device/dev-io.c
index 3fe264755..5fa0b7a9e 100644
--- a/lib/device/dev-io.c
+++ b/lib/device/dev-io.c
@@ -250,6 +250,58 @@ static int _dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64
 	return 1;
 }
 
+int dev_get_direct_block_sizes(struct device *dev, unsigned int *physical_block_size,
+				unsigned int *logical_block_size)
+{
+	int fd = dev->bcache_fd;
+	int do_close = 0;
+	unsigned int pbs = 0;
+	unsigned int lbs = 0;
+
+	if (dev->physical_block_size || dev->logical_block_size) {
+		*physical_block_size = dev->physical_block_size;
+		*logical_block_size = dev->logical_block_size;
+		return 1;
+	}
+
+	if (fd <= 0) {
+		if (!dev_open_readonly(dev))
+			return 0;
+		fd = dev_fd(dev);
+		do_close = 1;
+	}
+
+	/*
+	 * BLKPBSZGET from kernel comment for blk_queue_physical_block_size:
+	 * "the lowest possible sector size that the hardware can operate on
+	 * without reverting to read-modify-write operations"
+	 */
+	if (ioctl(fd, BLKPBSZGET, &pbs)) {
+		stack;
+		pbs = 0;
+	}
+
+	/*
+	 * BLKSSZGET from kernel comment for blk_queue_logical_block_size:
+	 * "the lowest possible block size that the storage device can address."
+	 */
+	if (ioctl(fd, BLKSSZGET, &lbs)) {
+		stack;
+		lbs = 0;
+	}
+
+	dev->physical_block_size = pbs;
+	dev->logical_block_size = lbs;
+
+	*physical_block_size = pbs;
+	*logical_block_size = lbs;
+
+	if (do_close && !dev_close_immediate(dev))
+		stack;
+
+	return 1;
+}
+
 /*-----------------------------------------------------------------
  * Public functions
  *---------------------------------------------------------------*/
diff --git a/lib/device/device.h b/lib/device/device.h
index 30e1e79b3..bb65f841d 100644
--- a/lib/device/device.h
+++ b/lib/device/device.h
@@ -67,8 +67,10 @@ struct device {
 	/* private */
 	int fd;
 	int open_count;
-	int phys_block_size;
-	int block_size;
+	int phys_block_size;     /* From either BLKPBSZGET or BLKSSZGET, don't use */
+	int block_size;	         /* From BLKBSZGET, returns bdev->bd_block_size, likely set by fs, probably don't use */
+	int physical_block_size; /* From BLKPBSZGET: lowest possible sector size that the hardware can operate on without reverting to read-modify-write operations */
+	int logical_block_size;  /* From BLKSSZGET: lowest possible block size that the storage device can address */
 	int read_ahead;
 	int bcache_fd;
 	uint32_t flags;
@@ -132,6 +134,8 @@ void dev_size_seqno_inc(void);
  * All io should use these routines.
  */
 int dev_get_block_size(struct device *dev, unsigned int *phys_block_size, unsigned int *block_size);
+int dev_get_direct_block_sizes(struct device *dev, unsigned int *physical_block_size,
+                               unsigned int *logical_block_size);
 int dev_get_size(struct device *dev, uint64_t *size);
 int dev_get_read_ahead(struct device *dev, uint32_t *read_ahead);
 int dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64_t size_bytes);
diff --git a/lib/label/label.c b/lib/label/label.c
index fb7ad1d56..5d8a0f51b 100644
--- a/lib/label/label.c
+++ b/lib/label/label.c
@@ -1495,16 +1495,34 @@ bool dev_set_bytes(struct device *dev, uint64_t start, size_t len, uint8_t val)
 
 void dev_set_last_byte(struct device *dev, uint64_t offset)
 {
-	unsigned int phys_block_size = 0;
-	unsigned int block_size = 0;
+	unsigned int physical_block_size = 0;
+	unsigned int logical_block_size = 0;
+	unsigned int bs;
 
-	if (!dev_get_block_size(dev, &phys_block_size, &block_size)) {
+	if (!dev_get_direct_block_sizes(dev, &physical_block_size, &logical_block_size)) {
 		stack;
-		/* FIXME  ASSERT or regular error testing is missing */
-		return;
+		return; /* FIXME: error path ? */
+	}
+
+	if ((physical_block_size == 512) && (logical_block_size == 512))
+		bs = 512;
+	else if ((physical_block_size == 4096) && (logical_block_size == 4096))
+		bs = 4096;
+	else if ((physical_block_size == 512) || (logical_block_size == 512)) {
+		log_debug("Set last byte mixed block sizes physical %u logical %u using 512",
+			  physical_block_size, logical_block_size);
+		bs = 512;
+	} else if ((physical_block_size == 4096) || (logical_block_size == 4096)) {
+		log_debug("Set last byte mixed block sizes physical %u logical %u using 4096",
+			  physical_block_size, logical_block_size);
+		bs = 4096;
+	} else {
+		log_debug("Set last byte mixed block sizes physical %u logical %u using 512",
+			  physical_block_size, logical_block_size);
+		bs = 512;
 	}
 
-	bcache_set_last_byte(scan_bcache, dev->bcache_fd, offset, phys_block_size);
+	bcache_set_last_byte(scan_bcache, dev->bcache_fd, offset, bs);
 }
 
 void dev_unset_last_byte(struct device *dev)
-- 
2.12.3