File btrfs-progs-mkfs-run-device-preparation-in-parallel.patch of Package btrfsprogs.31993

commit bb2eed3aa5e8808bd4cfad4b6678d52a32c73ff3
Author: Li Zhang <zhanglikernel@gmail.com>
Date:   Sun Sep 4 18:47:20 2022 +0800

    btrfs-progs: mkfs: run device preparation in parallel
    
    When devices are formatted as btrfs, btrfs_prepare_device is called
    sequentially for each device, which takes too much time.
    
    Put each btrfs_prepare_device into a thread, wait for the first thread
    to complete to mkfs.btrfs, and wait for other threads to complete before
    adding other devices to the file system.
    
    During the preparation it's either trim/discard or zone reset.
    
    This was tested with TCMU emulation with two zoned devices.  Each device
    is 2000G (about 19.53 TiB), the region size is 4MB, Use the following
    parameters for targetcli:
    
      create name=zbc0 size=20000G cfgstring=model-HM/zsize-4/conv-100@~/zbc0.raw
    
    Call difftime to calculate the running time of the function
    btrfs_prepare_device.  Calculate the time from thread creation to
    completion of all threads after patching:
    
      $ lsscsi -p
      [10:0:1:0]   (0x14)  LIO-ORG  TCMU ZBC device  0002  /dev/sdb   -          none
      [11:0:1:0]   (0x14)  LIO-ORG  TCMU ZBC device  0002  /dev/sdc   -          none
    
      $ sudo mkfs.btrfs -d single -m single -O zoned /dev/sdc /dev/sdb -f
      ....
      time for prepare devices:4.000000.
      ....
    
      $ sudo mkfs.btrfs -d single -m single -O zoned /dev/sdc /dev/sdb -f
      ...
      time for prepare devices:2.000000.
      ...
    
    Issue: #496
    Reviewed-by: Qu Wenruo <wqu@suse.com>
    Signed-off-by: Li Zhang <zhanglikernel@gmail.com>
    Signed-off-by: David Sterba <dsterba@suse.com>

Index: btrfs-progs-v5.14/mkfs/main.c
===================================================================
--- btrfs-progs-v5.14.orig/mkfs/main.c
+++ btrfs-progs-v5.14/mkfs/main.c
@@ -28,6 +28,7 @@
 #include <limits.h>
 #include <unistd.h>
 #include <getopt.h>
+#include <pthread.h>
 #include <uuid/uuid.h>
 #include <ctype.h>
 #include <blkid/blkid.h>
@@ -61,6 +62,18 @@ struct mkfs_allocation {
 	u64 system;
 };
 
+static bool opt_zero_end = true;
+static bool opt_discard = true;
+static bool opt_zoned = true;
+static int opt_oflags = O_RDWR;
+
+struct prepare_device_progress {
+       char *file;
+       u64 dev_block_count;
+       u64 block_count;
+       int ret;
+};
+
 static int create_metadata_block_groups(struct btrfs_root *root, int mixed,
 				struct mkfs_allocation *allocation)
 {
@@ -865,6 +878,30 @@ fail:
 	return ret;
 }
 
+/* Thread callback for device preparation */
+static void *prepare_one_device(void *ctx)
+{
+	struct prepare_device_progress *prepare_ctx = ctx;
+	int fd;
+
+	fd = open(prepare_ctx->file, opt_oflags);
+	if (fd < 0) {
+		error("unable to open %s: %m", prepare_ctx->file);
+		prepare_ctx->ret = -errno;
+		return NULL;
+	}
+	prepare_ctx->ret = btrfs_prepare_device(fd, prepare_ctx->file,
+				&prepare_ctx->dev_block_count,
+				prepare_ctx->block_count,
+				(bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
+				(opt_zero_end ? PREP_DEVICE_ZERO_END : 0) |
+				(opt_discard ? PREP_DEVICE_DISCARD : 0) |
+				(opt_zoned ? PREP_DEVICE_ZONED : 0));
+	close(fd);
+
+	return NULL;
+}
+
 int BOX_MAIN(mkfs)(int argc, char **argv)
 {
 	char *file;
@@ -880,7 +917,6 @@ int BOX_MAIN(mkfs)(int argc, char **argv
 	u32 nodesize = 0;
 	u32 sectorsize = 0;
 	u32 stripesize = 4096;
-	int zero_end = 1;
 	int fd = -1;
 	int ret = 0;
 	int close_ret;
@@ -889,9 +925,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv
 	int nodesize_forced = 0;
 	int data_profile_opt = 0;
 	int metadata_profile_opt = 0;
-	int discard = 1;
 	int ssd = 0;
-	int zoned = 0;
 	int force_overwrite = 0;
 	char *source_dir = NULL;
 	bool source_dir_set = false;
@@ -901,6 +935,8 @@ int BOX_MAIN(mkfs)(int argc, char **argv
 	u64 shrink_size;
 	int dev_cnt = 0;
 	int saved_optind;
+	pthread_t *t_prepare = NULL;
+	struct prepare_device_progress *prepare_ctx = NULL;
 	char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = { 0 };
 	u64 features = BTRFS_MKFS_DEFAULT_FEATURES;
 	u64 runtime_features = 0;
@@ -1011,7 +1047,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv
 				break;
 			case 'b':
 				block_count = parse_size_from_string(optarg);
-				zero_end = 0;
+				opt_zero_end = false;
 				break;
 			case 'V':
 				printf("mkfs.btrfs, part of %s\n",
@@ -1026,7 +1062,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv
 					BTRFS_UUID_UNPARSED_SIZE - 1);
 				break;
 			case 'K':
-				discard = 0;
+				opt_discard = false;
 				break;
 			case 'q':
 				verbose = 0;
@@ -1062,7 +1098,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv
 	if (dev_cnt == 0)
 		print_usage(1);
 
-	zoned = (features & BTRFS_FEATURE_INCOMPAT_ZONED);
+	opt_zoned = (features & BTRFS_FEATURE_INCOMPAT_ZONED);
 
 	if (source_dir_set && dev_cnt > 1) {
 		error("the option -r is limited to a single device");
@@ -1104,7 +1140,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv
 
 	file = argv[optind++];
 	ssd = is_ssd(file);
-	if (zoned) {
+	if (opt_zoned) {
 		if (!zone_size(file)) {
 			error("zoned: %s: zone size undefined", file);
 			exit(1);
@@ -1114,7 +1150,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv
 			printf(
 	"Zoned: %s: host-managed device detected, setting zoned feature\n",
 			       file);
-		zoned = 1;
+		opt_zoned = true;
 		features |= BTRFS_FEATURE_INCOMPAT_ZONED;
 	}
 
@@ -1181,7 +1217,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv
 		features |= BTRFS_FEATURE_INCOMPAT_RAID1C34;
 	}
 
-	if (zoned) {
+	if (opt_zoned) {
 		if (source_dir_set) {
 			error("the option -r and zoned mode are incompatible");
 			exit(1);
@@ -1271,7 +1307,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv
 	 * 1 zone for a metadata block group
 	 * 1 zone for a data block group
 	 */
-	if (zoned && block_count && block_count < 5 * zone_size(file)) {
+	if (opt_zoned && block_count && block_count < 5 * zone_size(file)) {
 		error("size %llu is too small to make a usable filesystem",
 			block_count);
 		error("minimum size for a zoned btrfs filesystem is %llu",
@@ -1301,31 +1337,60 @@ int BOX_MAIN(mkfs)(int argc, char **argv
 	if (ret)
 		goto error;
 
-	if (zoned && ((metadata_profile | data_profile) &
+	if (opt_zoned && ((metadata_profile | data_profile) &
 		      BTRFS_BLOCK_GROUP_PROFILE_MASK)) {
 		error("zoned mode does not yet support RAID/DUP profiles, please specify '-d single -m single' manually");
 		goto error;
 	}
 
-	dev_cnt--;
+	t_prepare = calloc(dev_cnt, sizeof(*t_prepare));
+	prepare_ctx = calloc(dev_cnt, sizeof(*prepare_ctx));
 
-	/*
-	 * Open without O_EXCL so that the problem should not occur by the
-	 * following operation in kernel:
-	 * (btrfs_register_one_device() fails if O_EXCL is on)
-	 */
-	fd = open(file, O_RDWR);
+	if (!t_prepare || !prepare_ctx) {
+		error("unable to alloc thread for preparing devices");
+		goto error;
+	}
+	opt_oflags = O_RDWR;
+	for (i = 0; i < dev_cnt; i++) {
+		if (opt_zoned &&
+				zoned_model(argv[optind + i - 1]) == ZONED_HOST_MANAGED) {
+			opt_oflags |= O_DIRECT;
+			break;
+		}
+	}
+
+	/* Start threads */
+	for (i = 0; i < dev_cnt; i++) {
+		prepare_ctx[i].file = argv[optind + i - 1];
+		prepare_ctx[i].block_count = block_count;
+		prepare_ctx[i].dev_block_count = block_count;
+		ret = pthread_create(&t_prepare[i], NULL, prepare_one_device,
+				&prepare_ctx[i]);
+		if (ret) {
+			errno = -ret;
+			error("failed to create thread for prepare device %s: %m",
+					prepare_ctx[i].file);
+			goto error;
+		}
+	}
+
+	/* Wait for threads */
+	for (i = 0; i < dev_cnt; i++)
+		pthread_join(t_prepare[i], NULL);
+	ret = prepare_ctx[0].ret;
+
+	if (ret) {
+		error("unable prepare device: %s", prepare_ctx[0].file);
+		goto error;
+	}
+
+	dev_cnt--;
+	fd = open(file, opt_oflags);
 	if (fd < 0) {
 		error("unable to open %s: %m", file);
 		goto error;
 	}
-	ret = btrfs_prepare_device(fd, file, &dev_block_count, block_count,
-			(zero_end ? PREP_DEVICE_ZERO_END : 0) |
-			(discard ? PREP_DEVICE_DISCARD : 0) |
-			(verbose ? PREP_DEVICE_VERBOSE : 0) |
-			(zoned ? PREP_DEVICE_ZONED : 0));
-	if (ret)
-		goto error;
+	dev_block_count = prepare_ctx[0].dev_block_count;
 	if (block_count && block_count > dev_block_count) {
 		error("%s is smaller than requested size, expected %llu, found %llu",
 		      file, (unsigned long long)block_count,
@@ -1334,7 +1399,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv
 	}
 
 	/* To create the first block group and chunk 0 in make_btrfs */
-	system_group_size = zoned ?  zone_size(file) : BTRFS_MKFS_SYSTEM_GROUP_SIZE;
+	system_group_size = (opt_zoned ? zone_size(file) : BTRFS_MKFS_SYSTEM_GROUP_SIZE);
 	if (dev_block_count < system_group_size) {
 		error("device is too small to make filesystem, must be at least %llu",
 				(unsigned long long)system_group_size);
@@ -1414,14 +1479,10 @@ int BOX_MAIN(mkfs)(int argc, char **argv
 		goto raid_groups;
 
 	while (dev_cnt-- > 0) {
+		int dev_index = argc - saved_optind - dev_cnt - 1;
 		file = argv[optind++];
 
-		/*
-		 * open without O_EXCL so that the problem should not
-		 * occur by the following processing.
-		 * (btrfs_register_one_device() fails if O_EXCL is on)
-		 */
-		fd = open(file, O_RDWR);
+		fd = open(file, opt_oflags);
 		if (fd < 0) {
 			error("unable to open %s: %m", file);
 			goto error;
@@ -1434,13 +1495,12 @@ int BOX_MAIN(mkfs)(int argc, char **argv
 			close(fd);
 			continue;
 		}
-		ret = btrfs_prepare_device(fd, file, &dev_block_count,
-				block_count,
-				(verbose ? PREP_DEVICE_VERBOSE : 0) |
-				(zero_end ? PREP_DEVICE_ZERO_END : 0) |
-				(discard ? PREP_DEVICE_DISCARD : 0) |
-				(zoned ? PREP_DEVICE_ZONED : 0));
-		if (ret) {
+		dev_block_count = prepare_ctx[dev_index].dev_block_count;
+
+		if (prepare_ctx[dev_index].ret) {
+			errno = -prepare_ctx[dev_index].ret;
+			error("unable to prepare device %s: %m",
+					prepare_ctx[dev_index].file);
 			goto error;
 		}
 
@@ -1543,8 +1603,8 @@ raid_groups:
 			btrfs_group_profile_str(metadata_profile),
 			pretty_size(allocation.system));
 		printf("SSD detected:       %s\n", ssd ? "yes" : "no");
-		printf("Zoned device:       %s\n", zoned ? "yes" : "no");
-		if (zoned)
+		printf("Zoned device:       %s\n", opt_zoned ? "yes" : "no");
+		if (opt_zoned)
 			printf("  Zone size:        %s\n",
 			       pretty_size(fs_info->zone_size));
 		btrfs_parse_fs_features_to_string(features_buf, features);
@@ -1591,6 +1651,8 @@ out:
 	}
 
 	btrfs_close_all_devices();
+	free(t_prepare);
+	free(prepare_ctx);
 	free(label);
 
 	return !!ret;
@@ -1598,6 +1660,8 @@ error:
 	if (fd > 0)
 		close(fd);
 
+	free(t_prepare);
+	free(prepare_ctx);
 	free(label);
 	exit(1);
 success:
openSUSE Build Service is sponsored by