Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
openSUSE:Leap:42.2:Ports
mdadm
1003-failfast.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 1003-failfast.patch of Package mdadm
From 38f27538b2c807e5d6d933474c1404cf78731d22 Mon Sep 17 00:00:00 2001 From: NeilBrown <neilb@suse.de> Date: Fri, 25 Nov 2011 12:19:19 +1100 Subject: [PATCH] Add failfast support. Allow per-device failfast flag to be set when creating an array or adding devices to an array. Also print failfast status in --detail and --examine. Signed-off-by: NeilBrown <neilb@suse.de> --- Create.c | 2 ++ Detail.c | 1 + Incremental.c | 1 + Manage.c | 20 +++++++++++++++++++- ReadMe.c | 2 ++ md_p.h | 1 + mdadm.8.in | 28 +++++++++++++++++++++++++++- mdadm.c | 11 +++++++++++ mdadm.h | 5 +++++ super0.c | 11 +++++++---- super1.c | 13 +++++++++++++ 11 files changed, 89 insertions(+), 6 deletions(-) Index: mdadm-3.3.4/Create.c =================================================================== --- mdadm-3.3.4.orig/Create.c +++ mdadm-3.3.4/Create.c @@ -885,6 +885,8 @@ int Create(struct supertype *st, char *m if (dv->writemostly == 1) inf->disk.state |= (1<<MD_DISK_WRITEMOSTLY); + if (dv->failfast == 1) + inf->disk.state |= (1<<MD_DISK_FAILFAST); if (have_container) fd = -1; Index: mdadm-3.3.4/Detail.c =================================================================== --- mdadm-3.3.4.orig/Detail.c +++ mdadm-3.3.4/Detail.c @@ -656,6 +656,7 @@ This is pretty boring } if (disk.state & (1<<MD_DISK_REMOVED)) printf(" removed"); if (disk.state & (1<<MD_DISK_WRITEMOSTLY)) printf(" writemostly"); + if (disk.state & (1<<MD_DISK_FAILFAST)) printf(" failfast"); if (disk.state & (1<<MD_DISK_JOURNAL)) printf(" journal"); if ((disk.state & ((1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC) Index: mdadm-3.3.4/Incremental.c =================================================================== --- mdadm-3.3.4.orig/Incremental.c +++ mdadm-3.3.4/Incremental.c @@ -1038,6 +1038,7 @@ static int array_try_spare(char *devname devlist.next = NULL; devlist.used = 0; devlist.writemostly = 0; + devlist.failfast = 0; devlist.devname = chosen_devname; sprintf(chosen_devname, "%d:%d", major(stb.st_rdev), minor(stb.st_rdev)); Index: mdadm-3.3.4/Manage.c =================================================================== --- mdadm-3.3.4.orig/Manage.c +++ mdadm-3.3.4/Manage.c @@ -682,8 +682,13 @@ int attempt_re_add(int fd, int tfd, stru disc.state |= 1 << MD_DISK_WRITEMOSTLY; if (dv->writemostly == 2) disc.state &= ~(1 << MD_DISK_WRITEMOSTLY); + if (dv->failfast == 1) + disc.state |= 1 << MD_DISK_FAILFAST; + if (dv->failfast == 2) + disc.state &= ~(1 << MD_DISK_FAILFAST); remove_partitions(tfd); - if (update || dv->writemostly > 0) { + if (update || dv->writemostly > 0 + || dv->failfast > 0) { int rv = -1; tfd = dev_open(dv->devname, O_RDWR); if (tfd < 0) { @@ -699,6 +704,14 @@ int attempt_re_add(int fd, int tfd, stru rv = dev_st->ss->update_super( dev_st, NULL, "readwrite", devname, verbose, 0, NULL); + if (dv->failfast == 1) + rv = dev_st->ss->update_super( + dev_st, NULL, "failfast", + devname, verbose, 0, NULL); + if (dv->failfast == 2) + rv = dev_st->ss->update_super( + dev_st, NULL, "nofailfast", + devname, verbose, 0, NULL); if (update) rv = dev_st->ss->update_super( dev_st, NULL, update, @@ -957,6 +970,8 @@ int Manage_add(int fd, int tfd, struct m disc.state |= (1 << MD_DISK_JOURNAL) | (1 << MD_DISK_SYNC); if (dv->writemostly == 1) disc.state |= 1 << MD_DISK_WRITEMOSTLY; + if (dv->failfast == 1) + disc.state |= 1 << MD_DISK_FAILFAST; dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT); if (tst->ss->add_to_super(tst, &disc, dfd, dv->devname, INVALID_SECTORS)) @@ -1002,6 +1017,8 @@ int Manage_add(int fd, int tfd, struct m if (dv->writemostly == 1) disc.state |= (1 << MD_DISK_WRITEMOSTLY); + if (dv->failfast == 1) + disc.state |= (1 << MD_DISK_FAILFAST); if (tst->ss->external) { /* add a disk * to an external metadata container */ @@ -1770,6 +1787,7 @@ int move_spare(char *from_devname, char devlist.next = NULL; devlist.used = 0; devlist.writemostly = 0; + devlist.failfast = 0; devlist.devname = devname; sprintf(devname, "%d:%d", major(devid), minor(devid)); Index: mdadm-3.3.4/ReadMe.c =================================================================== --- mdadm-3.3.4.orig/ReadMe.c +++ mdadm-3.3.4/ReadMe.c @@ -136,6 +136,8 @@ struct option long_options[] = { {"bitmap-chunk", 1, 0, BitmapChunk}, {"write-behind", 2, 0, WriteBehind}, {"write-mostly",0, 0, WriteMostly}, + {"failfast", 0, 0, FailFast}, + {"nofailfast",0, 0, NoFailFast}, {"re-add", 0, 0, ReAdd}, {"homehost", 1, 0, HomeHost}, {"symlinks", 1, 0, Symlinks}, Index: mdadm-3.3.4/md_p.h =================================================================== --- mdadm-3.3.4.orig/md_p.h +++ mdadm-3.3.4/md_p.h @@ -89,6 +89,7 @@ * read requests will only be sent here in * dire need */ +#define MD_DISK_FAILFAST 10 /* Fewer retries, more failures */ #define MD_DISK_REPLACEMENT 17 #define MD_DISK_JOURNAL 18 /* disk is used as the write journal in RAID-5/6 */ Index: mdadm-3.3.4/mdadm.8.in =================================================================== --- mdadm-3.3.4.orig/mdadm.8.in +++ mdadm-3.3.4/mdadm.8.in @@ -747,7 +747,7 @@ subsequent devices listed in a .BR \-\-create , or .B \-\-add -command will be flagged as 'write-mostly'. This is valid for RAID1 +command will be flagged as 'write\-mostly'. This is valid for RAID1 only and means that the 'md' driver will avoid reading from these devices if at all possible. This can be useful if mirroring over a slow link. @@ -762,6 +762,21 @@ mode, and write-behind is only attempted .IR write-mostly . .TP +.BR \-\-failfast +subsequence devices listed in a +.BR \-\-create , +or +.B \-\-add +command will be flagged as 'failfast'. This is valid for RAID1 and +RAID10 only. IO requests to these devices will be encouraged to fail +quickly rather than cause long delays due to error handling. Also no +attempt is made to repair a read error on these devices. + +If an array becomes degraded so that the 'failfast' device is the only +usable device, the 'failfast' flag will then be ignored and extended +delays will be preferred to complete failure. + +.TP .BR \-\-assume\-clean Tell .I mdadm @@ -1443,6 +1458,17 @@ number. The receiving node must acknowle with \-\-cluster\-confirm. Valid arguments are <slot>:<devicename> in case the device is found or <slot>:missing in case the device is not found. +.TP +.BR \-\-failfast +Subsequent devices that are added or re\-added will have +the 'failfast' flag set. This is only valid for RAID1 and RAID10 and +means that the 'md' driver will avoid long timeouts on error handling +where possible. +.TP +.BR \-\-nofailfast +Subsequent devices that are re\-added will be re\-added without +the 'failfast' flag set. + .P Each of these options requires that the first device listed is the array to be acted upon, and the remainder are component devices to be added, Index: mdadm-3.3.4/mdadm.c =================================================================== --- mdadm-3.3.4.orig/mdadm.c +++ mdadm-3.3.4/mdadm.c @@ -90,6 +90,7 @@ int main(int argc, char *argv[]) int spare_sharing = 1; struct supertype *ss = NULL; int writemostly = 0; + int failfast = 0; char *shortopt = short_options; int dosyslog = 0; int rebuild_map = 0; @@ -295,6 +296,7 @@ int main(int argc, char *argv[]) dv->devname = optarg; dv->disposition = devmode; dv->writemostly = writemostly; + dv->failfast = failfast; dv->used = 0; dv->next = NULL; *devlistend = dv; @@ -351,6 +353,7 @@ int main(int argc, char *argv[]) dv->devname = optarg; dv->disposition = devmode; dv->writemostly = writemostly; + dv->failfast = failfast; dv->used = 0; dv->next = NULL; *devlistend = dv; @@ -417,6 +420,14 @@ int main(int argc, char *argv[]) writemostly = 2; continue; + case O(MANAGE,FailFast): + case O(CREATE,FailFast): + failfast = 1; + continue; + case O(MANAGE,NoFailFast): + failfast = 2; + continue; + case O(GROW,'z'): case O(CREATE,'z'): case O(BUILD,'z'): /* size */ Index: mdadm-3.3.4/mdadm.h =================================================================== --- mdadm-3.3.4.orig/mdadm.h +++ mdadm-3.3.4/mdadm.h @@ -380,6 +380,8 @@ enum special_options { ConfigFile, ChunkSize, WriteMostly, + FailFast, + NoFailFast, Layout, Auto, Force, @@ -513,6 +515,7 @@ struct mddev_dev { * Not set for names read from .config */ char writemostly; /* 1 for 'set writemostly', 2 for 'clear writemostly' */ + char failfast; /* Ditto but for 'failfast' flag */ int used; /* set when used */ long long data_offset; struct mddev_dev *next; @@ -818,6 +821,8 @@ extern struct superswitch { * linear-grow-update - now change the size of the array. * writemostly - set the WriteMostly1 bit in the superblock devflags * readwrite - clear the WriteMostly1 bit in the superblock devflags + * failfast - set the FailFast1 bit in the superblock + * nofailfast - clear the FailFast1 bit * no-bitmap - clear any record that a bitmap is present. * bbl - add a bad-block-log if possible * no-bbl - remove any bad-block-log is it is empty. Index: mdadm-3.3.4/super0.c =================================================================== --- mdadm-3.3.4.orig/super0.c +++ mdadm-3.3.4/super0.c @@ -216,19 +216,21 @@ static void examine_super0(struct supert mdp_disk_t *dp; char *dv; char nb[5]; - int wonly; + int wonly, failfast; if (d>=0) dp = &sb->disks[d]; else dp = &sb->this_disk; snprintf(nb, sizeof(nb), "%4d", d); printf("%4s %5d %5d %5d %5d ", d < 0 ? "this" : nb, dp->number, dp->major, dp->minor, dp->raid_disk); wonly = dp->state & (1<<MD_DISK_WRITEMOSTLY); - dp->state &= ~(1<<MD_DISK_WRITEMOSTLY); + failfast = dp->state & (1<<MD_DISK_FAILFAST); + dp->state &= ~(wonly | failfast); if (dp->state & (1<<MD_DISK_FAULTY)) printf(" faulty"); if (dp->state & (1<<MD_DISK_ACTIVE)) printf(" active"); if (dp->state & (1<<MD_DISK_SYNC)) printf(" sync"); if (dp->state & (1<<MD_DISK_REMOVED)) printf(" removed"); if (wonly) printf(" write-mostly"); + if (failfast) printf(" failfast"); if (dp->state == 0) printf(" spare"); if ((dv=map_dev(dp->major, dp->minor, 0))) printf(" %s", dv); @@ -557,7 +559,8 @@ static int update_super0(struct supertyp } else if (strcmp(update, "assemble")==0) { int d = info->disk.number; int wonly = sb->disks[d].state & (1<<MD_DISK_WRITEMOSTLY); - int mask = (1<<MD_DISK_WRITEMOSTLY); + int failfast = sb->disks[d].state & (1<<MD_DISK_FAILFAST); + int mask = (1<<MD_DISK_WRITEMOSTLY)|(1<<MD_DISK_FAILFAST); int add = 0; if (sb->minor_version >= 91) /* During reshape we don't insist on everything @@ -566,7 +569,7 @@ static int update_super0(struct supertyp add = (1<<MD_DISK_SYNC); if (((sb->disks[d].state & ~mask) | add) != (unsigned)info->disk.state) { - sb->disks[d].state = info->disk.state | wonly; + sb->disks[d].state = info->disk.state | wonly |failfast; rv = 1; } if (info->reshape_active && Index: mdadm-3.3.4/super1.c =================================================================== --- mdadm-3.3.4.orig/super1.c +++ mdadm-3.3.4/super1.c @@ -77,6 +77,7 @@ struct mdp_superblock_1 { __u8 device_uuid[16]; /* user-space setable, ignored by kernel */ __u8 devflags; /* per-device flags. Only one defined...*/ #define WriteMostly1 1 /* mask for writemostly flag in above */ +#define FailFast1 2 /* Device should get FailFast requests */ /* bad block log. If there are any bad blocks the feature flag is set. * if offset and size are non-zero, that space is reserved and available. */ @@ -428,6 +429,8 @@ static void examine_super1(struct supert printf(" Flags :"); if (sb->devflags & WriteMostly1) printf(" write-mostly"); + if (sb->devflags & FailFast1) + printf(" failfast"); printf("\n"); } @@ -1022,6 +1025,8 @@ static void getinfo_super1(struct supert } if (sb->devflags & WriteMostly1) info->disk.state |= (1 << MD_DISK_WRITEMOSTLY); + if (sb->devflags & FailFast1) + info->disk.state |= (1 << MD_DISK_FAILFAST); info->events = __le64_to_cpu(sb->events); sprintf(info->text_version, "1.%d", st->minor_version); info->safe_mode_delay = 200; @@ -1370,6 +1375,10 @@ static int update_super1(struct supertyp sb->devflags |= WriteMostly1; else if (strcmp(update, "readwrite")==0) sb->devflags &= ~WriteMostly1; + else if (strcmp(update, "failfast") == 0) + sb->devflags |= FailFast1; + else if (strcmp(update, "nofailfast") == 0) + sb->devflags &= ~FailFast1; else rv = -1; @@ -1712,6 +1721,10 @@ static int write_init_super1(struct supe sb->devflags |= WriteMostly1; else sb->devflags &= ~WriteMostly1; + if (di->disk.state & (1<<MD_DISK_FAILFAST)) + sb->devflags |= FailFast1; + else + sb->devflags &= ~FailFast1; if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 || read(rfd, sb->device_uuid, 16) != 16) {
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor