diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-04 21:12:47 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-04 21:12:47 -0800 |
commit | ac322de6bf5416cb145b58599297b8be73cd86ac (patch) | |
tree | 1a1be9f8b9241159fb4cde14a548eba9a4155b28 /include | |
parent | ccf21b69a83afaee4d5499e0d03eacf23946e08c (diff) | |
parent | 339421def582abb14c2217aa8c8f28bb2e299174 (diff) |
Merge tag 'md/4.4' of git://neil.brown.name/md
Pull md updates from Neil Brown:
"Two major components to this update.
1) The clustered-raid1 support from SUSE is nearly complete. There
are a few outstanding issues being worked on. Maybe half a dozen
patches will bring this to a usable state.
2) The first stage of journalled-raid5 support from Facebook makes an
appearance. With a journal device configured (typically NVRAM or
SSD), the "RAID5 write hole" should be closed - a crash during
degraded operations cannot result in data corruption.
The next stage will be to use the journal as a write-behind cache
so that latency can be reduced and in some cases throughput
increased by performing more full-stripe writes.
* tag 'md/4.4' of git://neil.brown.name/md: (66 commits)
MD: when RAID journal is missing/faulty, block RESTART_ARRAY_RW
MD: set journal disk ->raid_disk
MD: kick out journal disk if it's not fresh
raid5-cache: start raid5 readonly if journal is missing
MD: add new bit to indicate raid array with journal
raid5-cache: IO error handling
raid5: journal disk can't be removed
raid5-cache: add trim support for log
MD: fix info output for journal disk
raid5-cache: use bio chaining
raid5-cache: small log->seq cleanup
raid5-cache: new helper: r5_reserve_log_entry
raid5-cache: inline r5l_alloc_io_unit into r5l_new_meta
raid5-cache: take rdev->data_offset into account early on
raid5-cache: refactor bio allocation
raid5-cache: clean up r5l_get_meta
raid5-cache: simplify state machine when caches flushes are not needed
raid5-cache: factor out a helper to run all stripes for an I/O unit
raid5-cache: rename flushed_ios to finished_ios
raid5-cache: free I/O units earlier
...
Diffstat (limited to 'include')
-rw-r--r-- | include/uapi/linux/raid/md_p.h | 73 |
1 files changed, 72 insertions, 1 deletions
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index 2ae6131e69a5..c3e654c6d518 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -89,6 +89,12 @@ * read requests will only be sent here in * dire need */ +#define MD_DISK_JOURNAL 18 /* disk is used as the write journal in RAID-5/6 */ + +#define MD_DISK_ROLE_SPARE 0xffff +#define MD_DISK_ROLE_FAULTY 0xfffe +#define MD_DISK_ROLE_JOURNAL 0xfffd +#define MD_DISK_ROLE_MAX 0xff00 /* max value of regular disk role */ typedef struct mdp_device_descriptor_s { __u32 number; /* 0 Device number in the entire set */ @@ -252,7 +258,10 @@ struct mdp_superblock_1 { __le64 data_offset; /* sector start of data, often 0 */ __le64 data_size; /* sectors in this device that can be used for data */ __le64 super_offset; /* sector start of this superblock */ - __le64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */ + union { + __le64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */ + __le64 journal_tail;/* journal tail of journal device (from data_offset) */ + }; __le32 dev_number; /* permanent identifier of this device - not role in raid */ __le32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */ __u8 device_uuid[16]; /* user-space setable, ignored by kernel */ @@ -302,6 +311,8 @@ struct mdp_superblock_1 { #define MD_FEATURE_RECOVERY_BITMAP 128 /* recovery that is happening * is guided by bitmap. */ +#define MD_FEATURE_CLUSTERED 256 /* clustered MD */ +#define MD_FEATURE_JOURNAL 512 /* support write cache */ #define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ |MD_FEATURE_RECOVERY_OFFSET \ |MD_FEATURE_RESHAPE_ACTIVE \ @@ -310,6 +321,66 @@ struct mdp_superblock_1 { |MD_FEATURE_RESHAPE_BACKWARDS \ |MD_FEATURE_NEW_OFFSET \ |MD_FEATURE_RECOVERY_BITMAP \ + |MD_FEATURE_CLUSTERED \ + |MD_FEATURE_JOURNAL \ ) +struct r5l_payload_header { + __le16 type; + __le16 flags; +} __attribute__ ((__packed__)); + +enum r5l_payload_type { + R5LOG_PAYLOAD_DATA = 0, + R5LOG_PAYLOAD_PARITY = 1, + R5LOG_PAYLOAD_FLUSH = 2, +}; + +struct r5l_payload_data_parity { + struct r5l_payload_header header; + __le32 size; /* sector. data/parity size. each 4k + * has a checksum */ + __le64 location; /* sector. For data, it's raid sector. For + * parity, it's stripe sector */ + __le32 checksum[]; +} __attribute__ ((__packed__)); + +enum r5l_payload_data_parity_flag { + R5LOG_PAYLOAD_FLAG_DISCARD = 1, /* payload is discard */ + /* + * RESHAPED/RESHAPING is only set when there is reshape activity. Note, + * both data/parity of a stripe should have the same flag set + * + * RESHAPED: reshape is running, and this stripe finished reshape + * RESHAPING: reshape is running, and this stripe isn't reshaped + */ + R5LOG_PAYLOAD_FLAG_RESHAPED = 2, + R5LOG_PAYLOAD_FLAG_RESHAPING = 3, +}; + +struct r5l_payload_flush { + struct r5l_payload_header header; + __le32 size; /* flush_stripes size, bytes */ + __le64 flush_stripes[]; +} __attribute__ ((__packed__)); + +enum r5l_payload_flush_flag { + R5LOG_PAYLOAD_FLAG_FLUSH_STRIPE = 1, /* data represents whole stripe */ +}; + +struct r5l_meta_block { + __le32 magic; + __le32 checksum; + __u8 version; + __u8 __zero_pading_1; + __le16 __zero_pading_2; + __le32 meta_size; /* whole size of the block */ + + __le64 seq; + __le64 position; /* sector, start from rdev->data_offset, current position */ + struct r5l_payload_header payloads[]; +} __attribute__ ((__packed__)); + +#define R5LOG_VERSION 0x1 +#define R5LOG_MAGIC 0x6433c509 #endif |