Discussion:
[PATCH 3/9] btrfs: Move the error logging from find_device() to its caller.
(too old to reply)
Goffredo Baroncelli
2018-10-11 18:50:57 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

The caller knows better if this error is fatal or not, i.e. another disk is
available or not.

This is a preparatory patch.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index c9f0c4193..2a87eb103 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -603,12 +603,7 @@ find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
if (do_rescan)
grub_device_iterate (find_device_iter, &ctx);
if (!ctx.dev_found)
- {
- grub_error (GRUB_ERR_BAD_FS,
- N_("couldn't find a necessary member device "
- "of multi-device filesystem"));
- return NULL;
- }
+ return NULL;
data->n_devices_attached++;
if (data->n_devices_attached > data->n_devices_allocated)
{
@@ -905,6 +900,9 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
dev = find_device (data, stripe->device_id, j);
if (!dev)
{
+ grub_dprintf ("btrfs",
+ "couldn't find a necessary member device "
+ "of multi-device filesystem\n");
err = grub_errno;
grub_errno = GRUB_ERR_NONE;
continue;
--
2.19.1
Goffredo Baroncelli
2018-10-11 18:50:56 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

This helper is used in a few places to help the debugging. As
conservative approach the error is only logged.
This does not impact the error handling.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 933a57d3b..c9f0c4193 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -77,7 +77,8 @@ struct btrfs_header
{
grub_btrfs_checksum_t checksum;
grub_btrfs_uuid_t uuid;
- grub_uint8_t dummy[0x30];
+ grub_uint64_t bytenr;
+ grub_uint8_t dummy[0x28];
grub_uint32_t nitems;
grub_uint8_t level;
} GRUB_PACKED;
@@ -286,6 +287,25 @@ free_iterator (struct grub_btrfs_leaf_descriptor *desc)
grub_free (desc->data);
}

+static grub_err_t
+check_btrfs_header (struct grub_btrfs_data *data, struct btrfs_header *header,
+ grub_disk_addr_t addr)
+{
+ if (grub_le_to_cpu64 (header->bytenr) != addr)
+ {
+ grub_dprintf ("btrfs", "btrfs_header.bytenr is not equal node addr\n");
+ return grub_error (GRUB_ERR_BAD_FS,
+ "header bytenr is not equal node addr");
+ }
+ if (grub_memcmp (data->sblock.uuid, header->uuid, sizeof(grub_btrfs_uuid_t)))
+ {
+ grub_dprintf ("btrfs", "btrfs_header.uuid doesn't match sblock uuid\n");
+ return grub_error (GRUB_ERR_BAD_FS,
+ "header uuid doesn't match sblock uuid");
+ }
+ return GRUB_ERR_NONE;
+}
+
static grub_err_t
save_ref (struct grub_btrfs_leaf_descriptor *desc,
grub_disk_addr_t addr, unsigned i, unsigned m, int l)
@@ -341,6 +361,7 @@ next (struct grub_btrfs_data *data,

err = grub_btrfs_read_logical (data, grub_le_to_cpu64 (node.addr),
&head, sizeof (head), 0);
+ check_btrfs_header (data, &head, grub_le_to_cpu64 (node.addr));
if (err)
return -err;

@@ -402,6 +423,7 @@ lower_bound (struct grub_btrfs_data *data,
/* FIXME: preread few nodes into buffer. */
err = grub_btrfs_read_logical (data, addr, &head, sizeof (head),
recursion_depth + 1);
+ check_btrfs_header (data, &head, addr);
if (err)
return err;
addr += sizeof (head);
--
2.19.1
Goffredo Baroncelli
2018-10-11 18:50:58 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

Currently read from missing device triggers rescan. However, it is never
recorded that the device is missing. So, each read of a missing device
triggers rescan again and again. This behavior causes a lot of unneeded
rescans leading to huge slowdowns.

This patch fixes above mentioned issue. Information about missing devices
is stored in the data->devices_attached[] array as NULL value in dev
member. Rescan is triggered only if no information is found for a given
device. This means that only first time read triggers rescan.

The patch drops premature return. This way data->devices_attached[] is
filled even when a given device is missing.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
---
grub-core/fs/btrfs.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 2a87eb103..b2be80c33 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -588,7 +588,7 @@ find_device_iter (const char *name, void *data)
}

static grub_device_t
-find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
+find_device (struct grub_btrfs_data *data, grub_uint64_t id)
{
struct find_device_ctx ctx = {
.data = data,
@@ -600,10 +600,9 @@ find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
for (i = 0; i < data->n_devices_attached; i++)
if (id == data->devices_attached[i].id)
return data->devices_attached[i].dev;
- if (do_rescan)
- grub_device_iterate (find_device_iter, &ctx);
- if (!ctx.dev_found)
- return NULL;
+
+ grub_device_iterate (find_device_iter, &ctx);
+
data->n_devices_attached++;
if (data->n_devices_attached > data->n_devices_allocated)
{
@@ -615,7 +614,8 @@ find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
* sizeof (data->devices_attached[0]));
if (!data->devices_attached)
{
- grub_device_close (ctx.dev_found);
+ if (ctx.dev_found)
+ grub_device_close (ctx.dev_found);
data->devices_attached = tmp;
return NULL;
}
@@ -897,7 +897,7 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
" for laddr 0x%" PRIxGRUB_UINT64_T "\n", paddr,
addr);

- dev = find_device (data, stripe->device_id, j);
+ dev = find_device (data, stripe->device_id);
if (!dev)
{
grub_dprintf ("btrfs",
@@ -974,7 +974,8 @@ grub_btrfs_unmount (struct grub_btrfs_data *data)
unsigned i;
/* The device 0 is closed one layer upper. */
for (i = 1; i < data->n_devices_attached; i++)
- grub_device_close (data->devices_attached[i].dev);
+ if (data->devices_attached[i].dev)
+ grub_device_close (data->devices_attached[i].dev);
grub_free (data->devices_attached);
grub_free (data->extent);
grub_free (data);
--
2.19.1
Daniel Kiper
2018-10-17 13:51:28 UTC
Permalink
Post by Goffredo Baroncelli
Currently read from missing device triggers rescan. However, it is never
recorded that the device is missing. So, each read of a missing device
triggers rescan again and again. This behavior causes a lot of unneeded
rescans leading to huge slowdowns.
This patch fixes above mentioned issue. Information about missing devices
is stored in the data->devices_attached[] array as NULL value in dev
member. Rescan is triggered only if no information is found for a given
device. This means that only first time read triggers rescan.
The patch drops premature return. This way data->devices_attached[] is
filled even when a given device is missing.
I changed commit message, so, you should add
Signed-off-by: Daniel Kiper <***@oracle.com>

I simply forgot to tell you about that. Sorry.

And below you can add
Reviewed-by: Daniel Kiper <***@oracle.com>

Daniel
Goffredo Baroncelli
2018-10-11 18:51:01 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

Add support for recovery for a RAID 5 btrfs profile. In addition
it is added some code as preparatory work for RAID 6 recovery code.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
---
grub-core/fs/btrfs.c | 162 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 157 insertions(+), 5 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 899dc32b7..d066d54cc 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -29,6 +29,7 @@
#include <minilzo.h>
#include <grub/i18n.h>
#include <grub/btrfs.h>
+#include <grub/crypto.h>

GRUB_MOD_LICENSE ("GPLv3+");

@@ -665,6 +666,141 @@ btrfs_read_from_chunk (struct grub_btrfs_data *data,
return err;
}

+struct raid56_buffer {
+ void *buf;
+ int data_is_valid;
+};
+
+static void
+rebuild_raid5 (char *dest, struct raid56_buffer *buffers,
+ grub_uint64_t nstripes, grub_uint64_t csize)
+{
+ grub_uint64_t i;
+ int first;
+
+ for(i = 0; buffers[i].data_is_valid && i < nstripes; i++);
+
+ if (i == nstripes)
+ {
+ grub_dprintf ("btrfs", "called rebuild_raid5(), but all disks are OK\n");
+ return;
+ }
+
+ grub_dprintf ("btrfs", "rebuilding RAID 5 stripe #%" PRIuGRUB_UINT64_T "\n", i);
+
+ for (i = 0, first = 1; i < nstripes; i++)
+ {
+ if (!buffers[i].data_is_valid)
+ continue;
+
+ if (first) {
+ grub_memcpy(dest, buffers[i].buf, csize);
+ first = 0;
+ } else
+ grub_crypto_xor (dest, dest, buffers[i].buf, csize);
+
+ }
+}
+
+static grub_err_t
+raid56_read_retry (struct grub_btrfs_data *data,
+ struct grub_btrfs_chunk_item *chunk,
+ grub_uint64_t stripe_offset,
+ grub_uint64_t csize, void *buf)
+{
+ struct raid56_buffer *buffers;
+ grub_uint64_t nstripes = grub_le_to_cpu16 (chunk->nstripes);
+ grub_uint64_t chunk_type = grub_le_to_cpu64 (chunk->type);
+ grub_err_t ret = GRUB_ERR_OUT_OF_MEMORY;
+ grub_uint64_t i, failed_devices;
+
+ buffers = grub_zalloc (sizeof(*buffers) * nstripes);
+ if (!buffers)
+ goto cleanup;
+
+ for (i = 0; i < nstripes; i++)
+ {
+ buffers[i].buf = grub_zalloc (csize);
+ if (!buffers[i].buf)
+ goto cleanup;
+ }
+
+ for (failed_devices = 0, i = 0; i < nstripes; i++)
+ {
+ struct grub_btrfs_chunk_stripe *stripe;
+ grub_disk_addr_t paddr;
+ grub_device_t dev;
+ grub_err_t err;
+
+ /* after the struct grub_btrfs_chunk_item, there is an array of
+ struct grub_btrfs_chunk_stripe */
+ stripe = (struct grub_btrfs_chunk_stripe *) (chunk + 1) + i;
+
+ paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;
+ grub_dprintf ("btrfs", "reading paddr %" PRIxGRUB_UINT64_T
+ " from stripe ID %" PRIxGRUB_UINT64_T "\n", paddr,
+ stripe->device_id);
+
+ dev = find_device (data, stripe->device_id);
+ if (!dev)
+ {
+ buffers[i].data_is_valid = 0;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " FAILED (dev ID %"
+ PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
+ failed_devices++;
+ continue;
+ }
+
+ err = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
+ paddr & (GRUB_DISK_SECTOR_SIZE - 1),
+ csize, buffers[i].buf);
+ if (err == GRUB_ERR_NONE)
+ {
+ buffers[i].data_is_valid = 1;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " Ok (dev ID %"
+ PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
+ }
+ else
+ {
+ buffers[i].data_is_valid = 0;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T
+ " FAILED (dev ID %" PRIxGRUB_UINT64_T ")\n", i,
+ stripe->device_id);
+ failed_devices++;
+ }
+ }
+
+ if (failed_devices > 1 && (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5))
+ {
+ grub_dprintf ("btrfs",
+ "not enough disks for RAID 5: total %" PRIuGRUB_UINT64_T
+ ", missing %" PRIuGRUB_UINT64_T "\n",
+ nstripes, failed_devices);
+ ret = GRUB_ERR_READ_ERROR;
+ goto cleanup;
+ }
+ else
+ grub_dprintf ("btrfs",
+ "enough disks for RAID 5 rebuilding: total %"
+ PRIuGRUB_UINT64_T ", missing %" PRIuGRUB_UINT64_T "\n",
+ nstripes, failed_devices);
+
+ /* if these are enough, try to rebuild the data */
+ if (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5)
+ rebuild_raid5 (buf, buffers, nstripes, csize);
+ else
+ grub_dprintf ("btrfs", "called rebuild_raid6(), NOT IMPLEMENTED\n");
+
+ ret = GRUB_ERR_NONE;
+ cleanup:
+ if (buffers)
+ for (i = 0; i < nstripes; i++)
+ grub_free(buffers[i].buf);
+ grub_free(buffers);
+
+ return ret;
+}
+
static grub_err_t
grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
void *buf, grub_size_t size, int recursion_depth)
@@ -742,6 +878,10 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_uint16_t nstripes;
unsigned redundancy = 1;
unsigned i, j;
+ int is_raid56;
+
+ is_raid56 = !!(grub_le_to_cpu64 (chunk->type) &
+ GRUB_BTRFS_CHUNK_TYPE_RAID5);

if (grub_le_to_cpu64 (chunk->size) <= off)
{
@@ -921,17 +1061,29 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_dprintf ("btrfs", "reading laddr 0x%" PRIxGRUB_UINT64_T "\n",
addr);

- for (i = 0; i < redundancy; i++)
+ if (!is_raid56)
+ for (i = 0; i < redundancy; i++)
+ {
+ err = btrfs_read_from_chunk (data, chunk, stripen,
+ stripe_offset,
+ i, /* redundancy */
+ csize, buf);
+ if (!err)
+ break;
+ grub_errno = GRUB_ERR_NONE;
+ }
+ else
{
err = btrfs_read_from_chunk (data, chunk, stripen,
stripe_offset,
- i, /* redundancy */
+ 0, /* no mirror */
csize, buf);
- if (!err)
- break;
grub_errno = GRUB_ERR_NONE;
+ if (err)
+ err = raid56_read_retry (data, chunk, stripe_offset,
+ csize, buf);
}
- if (i != redundancy)
+ if (!err)
break;
}
if (err)
--
2.19.1
Daniel Kiper
2018-10-17 14:14:14 UTC
Permalink
Post by Goffredo Baroncelli
Add support for recovery for a RAID 5 btrfs profile. In addition
it is added some code as preparatory work for RAID 6 recovery code.
---
grub-core/fs/btrfs.c | 162 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 157 insertions(+), 5 deletions(-)
diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 899dc32b7..d066d54cc 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -29,6 +29,7 @@
#include <minilzo.h>
#include <grub/i18n.h>
#include <grub/btrfs.h>
+#include <grub/crypto.h>
GRUB_MOD_LICENSE ("GPLv3+");
@@ -665,6 +666,141 @@ btrfs_read_from_chunk (struct grub_btrfs_data *data,
return err;
}
+struct raid56_buffer {
+ void *buf;
+ int data_is_valid;
+};
+
+static void
+rebuild_raid5 (char *dest, struct raid56_buffer *buffers,
+ grub_uint64_t nstripes, grub_uint64_t csize)
+{
+ grub_uint64_t i;
+ int first;
+
+ for(i = 0; buffers[i].data_is_valid && i < nstripes; i++);
+
+ if (i == nstripes)
+ {
+ grub_dprintf ("btrfs", "called rebuild_raid5(), but all disks are OK\n");
+ return;
+ }
+
+ grub_dprintf ("btrfs", "rebuilding RAID 5 stripe #%" PRIuGRUB_UINT64_T "\n", i);
+
+ for (i = 0, first = 1; i < nstripes; i++)
+ {
+ if (!buffers[i].data_is_valid)
+ continue;
+
+ if (first) {
+ grub_memcpy(dest, buffers[i].buf, csize);
+ first = 0;
+ } else
+ grub_crypto_xor (dest, dest, buffers[i].buf, csize);
+
Please drop this empty line.
Post by Goffredo Baroncelli
+ }
+}
+
+static grub_err_t
+raid56_read_retry (struct grub_btrfs_data *data,
+ struct grub_btrfs_chunk_item *chunk,
+ grub_uint64_t stripe_offset,
+ grub_uint64_t csize, void *buf)
+{
+ struct raid56_buffer *buffers;
+ grub_uint64_t nstripes = grub_le_to_cpu16 (chunk->nstripes);
+ grub_uint64_t chunk_type = grub_le_to_cpu64 (chunk->type);
+ grub_err_t ret = GRUB_ERR_OUT_OF_MEMORY;
+ grub_uint64_t i, failed_devices;
+
+ buffers = grub_zalloc (sizeof(*buffers) * nstripes);
+ if (!buffers)
+ goto cleanup;
+
+ for (i = 0; i < nstripes; i++)
+ {
+ buffers[i].buf = grub_zalloc (csize);
+ if (!buffers[i].buf)
+ goto cleanup;
+ }
+
+ for (failed_devices = 0, i = 0; i < nstripes; i++)
+ {
+ struct grub_btrfs_chunk_stripe *stripe;
+ grub_disk_addr_t paddr;
+ grub_device_t dev;
+ grub_err_t err;
+
+ /* after the struct grub_btrfs_chunk_item, there is an array of
+ struct grub_btrfs_chunk_stripe */
/* Struct grub_btrfs_chunk_stripe lives behind struct grub_btrfs_chunk_item. */
Post by Goffredo Baroncelli
+ stripe = (struct grub_btrfs_chunk_stripe *) (chunk + 1) + i;
+
+ paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;
+ grub_dprintf ("btrfs", "reading paddr %" PRIxGRUB_UINT64_T
+ " from stripe ID %" PRIxGRUB_UINT64_T "\n", paddr,
+ stripe->device_id);
+
+ dev = find_device (data, stripe->device_id);
+ if (!dev)
+ {
+ buffers[i].data_is_valid = 0;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " FAILED (dev ID %"
+ PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
+ failed_devices++;
+ continue;
+ }
+
+ err = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
+ paddr & (GRUB_DISK_SECTOR_SIZE - 1),
+ csize, buffers[i].buf);
+ if (err == GRUB_ERR_NONE)
+ {
+ buffers[i].data_is_valid = 1;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " Ok (dev ID %"
s/Ok/OK/
Post by Goffredo Baroncelli
+ PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
+ }
+ else
+ {
+ buffers[i].data_is_valid = 0;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T
+ " FAILED (dev ID %" PRIxGRUB_UINT64_T ")\n", i,
+ stripe->device_id);
+ failed_devices++;
+ }
+ }
+
+ if (failed_devices > 1 && (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5))
+ {
+ grub_dprintf ("btrfs",
+ "not enough disks for RAID 5: total %" PRIuGRUB_UINT64_T
+ ", missing %" PRIuGRUB_UINT64_T "\n",
+ nstripes, failed_devices);
+ ret = GRUB_ERR_READ_ERROR;
+ goto cleanup;
+ }
+ else
+ grub_dprintf ("btrfs",
+ "enough disks for RAID 5 rebuilding: total %"
s/enough disks for RAID 5 rebuilding/enough disks for RAID 5/
Post by Goffredo Baroncelli
+ PRIuGRUB_UINT64_T ", missing %" PRIuGRUB_UINT64_T "\n",
+ nstripes, failed_devices);
+
+ /* if these are enough, try to rebuild the data */
/* We have enough disks. So, rebuild the data. */
Post by Goffredo Baroncelli
+ if (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5)
+ rebuild_raid5 (buf, buffers, nstripes, csize);
+ else
+ grub_dprintf ("btrfs", "called rebuild_raid6(), NOT IMPLEMENTED\n");
+
+ ret = GRUB_ERR_NONE;
+ if (buffers)
+ for (i = 0; i < nstripes; i++)
+ grub_free(buffers[i].buf);
Lack of space between function name and "(".
Post by Goffredo Baroncelli
+ grub_free(buffers);
Ditto.
Post by Goffredo Baroncelli
+ return ret;
+}
+
static grub_err_t
grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
void *buf, grub_size_t size, int recursion_depth)
@@ -742,6 +878,10 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_uint16_t nstripes;
unsigned redundancy = 1;
unsigned i, j;
+ int is_raid56;
+
+ is_raid56 = !!(grub_le_to_cpu64 (chunk->type) &
+ GRUB_BTRFS_CHUNK_TYPE_RAID5);
if (grub_le_to_cpu64 (chunk->size) <= off)
{
@@ -921,17 +1061,29 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_dprintf ("btrfs", "reading laddr 0x%" PRIxGRUB_UINT64_T "\n",
addr);
- for (i = 0; i < redundancy; i++)
+ if (!is_raid56)
Why not "if (is_raid56)"? I asked about that earlier. Please change
this if and of course code below. It will be much easier to read. And
you do not need curly brackets for for loop after else.
Post by Goffredo Baroncelli
+ for (i = 0; i < redundancy; i++)
+ {
+ err = btrfs_read_from_chunk (data, chunk, stripen,
+ stripe_offset,
+ i, /* redundancy */
+ csize, buf);
+ if (!err)
+ break;
+ grub_errno = GRUB_ERR_NONE;
+ }
+ else
{
err = btrfs_read_from_chunk (data, chunk, stripen,
stripe_offset,
- i, /* redundancy */
+ 0, /* no mirror */
csize, buf);
- if (!err)
- break;
grub_errno = GRUB_ERR_NONE;
+ if (err)
+ err = raid56_read_retry (data, chunk, stripe_offset,
+ csize, buf);
}
- if (i != redundancy)
+ if (!err)
break;
}
if (err)
Daniel
Goffredo Baroncelli
2018-10-17 19:03:58 UTC
Permalink
Post by Daniel Kiper
Post by Goffredo Baroncelli
Add support for recovery for a RAID 5 btrfs profile. In addition
it is added some code as preparatory work for RAID 6 recovery code.
---
[...]
Post by Daniel Kiper
Post by Goffredo Baroncelli
+
+ for (failed_devices = 0, i = 0; i < nstripes; i++)
+ {
+ struct grub_btrfs_chunk_stripe *stripe;
+ grub_disk_addr_t paddr;
+ grub_device_t dev;
+ grub_err_t err;
+
+ /* after the struct grub_btrfs_chunk_item, there is an array of
+ struct grub_btrfs_chunk_stripe */
/* Struct grub_btrfs_chunk_stripe lives behind struct grub_btrfs_chunk_item. */
What about

/* The struct grub_btrfs_chunk_stripe array lives behind struct grub_btrfs_chunk_item. */

[...]
Post by Daniel Kiper
Post by Goffredo Baroncelli
@@ -921,17 +1061,29 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_dprintf ("btrfs", "reading laddr 0x%" PRIxGRUB_UINT64_T "\n",
addr);
- for (i = 0; i < redundancy; i++)
+ if (!is_raid56)
Why not "if (is_raid56)"? I asked about that earlier. Please change
this if and of course code below. It will be much easier to read. And
you do not need curly brackets for for loop after else.
Frankly speaking I don't see any problem having a if (!...). However I update the code as your request, hoping to speedup this patch set

[...]
--
gpg @keyserver.linux.it: Goffredo Baroncelli <kreijackATinwind.it>
Key fingerprint BBF5 1610 0B64 DAC6 5F7D 17B2 0EDA 9B37 8B82 E0B5
Daniel Kiper
2018-10-18 09:59:00 UTC
Permalink
Post by Goffredo Baroncelli
Post by Daniel Kiper
Post by Goffredo Baroncelli
Add support for recovery for a RAID 5 btrfs profile. In addition
it is added some code as preparatory work for RAID 6 recovery code.
---
[...]
Post by Daniel Kiper
Post by Goffredo Baroncelli
+
+ for (failed_devices = 0, i = 0; i < nstripes; i++)
+ {
+ struct grub_btrfs_chunk_stripe *stripe;
+ grub_disk_addr_t paddr;
+ grub_device_t dev;
+ grub_err_t err;
+
+ /* after the struct grub_btrfs_chunk_item, there is an array of
+ struct grub_btrfs_chunk_stripe */
/* Struct grub_btrfs_chunk_stripe lives behind struct grub_btrfs_chunk_item. */
What about
/* The struct grub_btrfs_chunk_stripe array lives behind struct grub_btrfs_chunk_item. */
Works for me.
Post by Goffredo Baroncelli
[...]
Post by Daniel Kiper
Post by Goffredo Baroncelli
@@ -921,17 +1061,29 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_dprintf ("btrfs", "reading laddr 0x%" PRIxGRUB_UINT64_T "\n",
addr);
- for (i = 0; i < redundancy; i++)
+ if (!is_raid56)
Why not "if (is_raid56)"? I asked about that earlier. Please change
this if and of course code below. It will be much easier to read. And
you do not need curly brackets for for loop after else.
Frankly speaking I don't see any problem having a if (!...). However I
update the code as your request, hoping to speedup this patch set
OK, it works. However, if you have "else" below then I think that it is
more natural to drop "!" here. If you would not have else I would not
complain. Well, because it would not make sense to do so... :-)))

Daniel
Goffredo Baroncelli
2018-10-11 18:51:02 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

The original code which handles the recovery of a RAID 6 disks array
assumes that all reads are multiple of 1 << GRUB_DISK_SECTOR_BITS and it
assumes that all the I/O is done via the struct grub_diskfilter_segment.
This is not true for the btrfs code. In order to reuse the native
grub_raid6_recover() code, it is modified to not call
grub_diskfilter_read_node() directly, but to call an handler passed
as an argument.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/disk/raid6_recover.c | 52 ++++++++++++++++++++++------------
include/grub/diskfilter.h | 9 ++++++
2 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/grub-core/disk/raid6_recover.c b/grub-core/disk/raid6_recover.c
index aa674f6ca..0cf691ddf 100644
--- a/grub-core/disk/raid6_recover.c
+++ b/grub-core/disk/raid6_recover.c
@@ -74,14 +74,26 @@ mod_255 (unsigned x)
}

static grub_err_t
-grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
- char *buf, grub_disk_addr_t sector, grub_size_t size)
+raid6_recover_read_node (void *data, int disknr,
+ grub_uint64_t sector,
+ void *buf, grub_size_t size)
+{
+ struct grub_diskfilter_segment *array = data;
+
+ return grub_diskfilter_read_node (&array->nodes[disknr],
+ (grub_disk_addr_t)sector,
+ size >> GRUB_DISK_SECTOR_BITS, buf);
+}
+
+grub_err_t
+grub_raid6_recover_gen (void *data, grub_uint64_t nstripes, int disknr, int p,
+ char *buf, grub_uint64_t sector, grub_size_t size,
+ int layout, raid_recover_read_t read_func)
{
int i, q, pos;
int bad1 = -1, bad2 = -1;
char *pbuf = 0, *qbuf = 0;

- size <<= GRUB_DISK_SECTOR_BITS;
pbuf = grub_zalloc (size);
if (!pbuf)
goto quit;
@@ -91,17 +103,17 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
goto quit;

q = p + 1;
- if (q == (int) array->node_count)
+ if (q == (int) nstripes)
q = 0;

pos = q + 1;
- if (pos == (int) array->node_count)
+ if (pos == (int) nstripes)
pos = 0;

- for (i = 0; i < (int) array->node_count - 2; i++)
+ for (i = 0; i < (int) nstripes - 2; i++)
{
int c;
- if (array->layout & GRUB_RAID_LAYOUT_MUL_FROM_POS)
+ if (layout & GRUB_RAID_LAYOUT_MUL_FROM_POS)
c = pos;
else
c = i;
@@ -109,8 +121,7 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
bad1 = c;
else
{
- if (! grub_diskfilter_read_node (&array->nodes[pos], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf))
+ if (!read_func(data, pos, sector, buf, size))
{
grub_crypto_xor (pbuf, pbuf, buf, size);
grub_raid_block_mulx (c, buf, size);
@@ -128,7 +139,7 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
}

pos++;
- if (pos == (int) array->node_count)
+ if (pos == (int) nstripes)
pos = 0;
}

@@ -139,16 +150,14 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
if (bad2 < 0)
{
/* One bad device */
- if ((! grub_diskfilter_read_node (&array->nodes[p], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf)))
+ if (!read_func(data, p, sector, buf, size))
{
grub_crypto_xor (buf, buf, pbuf, size);
goto quit;
}

grub_errno = GRUB_ERR_NONE;
- if (grub_diskfilter_read_node (&array->nodes[q], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf))
+ if (read_func(data, q, sector, buf, size))
goto quit;

grub_crypto_xor (buf, buf, qbuf, size);
@@ -160,14 +169,12 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
/* Two bad devices */
unsigned c;

- if (grub_diskfilter_read_node (&array->nodes[p], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf))
+ if (read_func(data, p, sector, buf, size))
goto quit;

grub_crypto_xor (pbuf, pbuf, buf, size);

- if (grub_diskfilter_read_node (&array->nodes[q], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf))
+ if (read_func(data, q, sector, buf, size))
goto quit;

grub_crypto_xor (qbuf, qbuf, buf, size);
@@ -190,6 +197,15 @@ quit:
return grub_errno;
}

+static grub_err_t
+grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
+ char *buf, grub_disk_addr_t sector, grub_size_t size)
+{
+ return grub_raid6_recover_gen (array, array->node_count, disknr, p, buf,
+ sector, size << GRUB_DISK_SECTOR_BITS,
+ array->layout, raid6_recover_read_node);
+}
+
GRUB_MOD_INIT(raid6rec)
{
grub_raid6_init_table ();
diff --git a/include/grub/diskfilter.h b/include/grub/diskfilter.h
index d89273c1b..8deb1a8c3 100644
--- a/include/grub/diskfilter.h
+++ b/include/grub/diskfilter.h
@@ -189,6 +189,15 @@ typedef grub_err_t (*grub_raid6_recover_func_t) (struct grub_diskfilter_segment
extern grub_raid5_recover_func_t grub_raid5_recover_func;
extern grub_raid6_recover_func_t grub_raid6_recover_func;

+typedef grub_err_t (* raid_recover_read_t)(void *data, int disk_nr,
+ grub_uint64_t addr, void *dest,
+ grub_size_t size);
+
+extern grub_err_t
+grub_raid6_recover_gen (void *data, grub_uint64_t nstripes, int disknr, int p,
+ char *buf, grub_uint64_t sector, grub_size_t size,
+ int layout, raid_recover_read_t read_func);
+
grub_err_t grub_diskfilter_vg_register (struct grub_diskfilter_vg *vg);

grub_err_t
--
2.19.1
Goffredo Baroncelli
2018-10-11 18:50:55 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Signed-off-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 73 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 73 insertions(+)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index be195448d..933a57d3b 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -119,6 +119,8 @@ struct grub_btrfs_chunk_item
#define GRUB_BTRFS_CHUNK_TYPE_RAID1 0x10
#define GRUB_BTRFS_CHUNK_TYPE_DUPLICATED 0x20
#define GRUB_BTRFS_CHUNK_TYPE_RAID10 0x40
+#define GRUB_BTRFS_CHUNK_TYPE_RAID5 0x80
+#define GRUB_BTRFS_CHUNK_TYPE_RAID6 0x100
grub_uint8_t dummy2[0xc];
grub_uint16_t nstripes;
grub_uint16_t nsubstripes;
@@ -764,6 +766,77 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
stripe_offset = low + chunk_stripe_length
* high;
csize = chunk_stripe_length - low;
+ break;
+ }
+ case GRUB_BTRFS_CHUNK_TYPE_RAID5:
+ case GRUB_BTRFS_CHUNK_TYPE_RAID6:
+ {
+ grub_uint64_t nparities, stripe_nr, high, low;
+
+ redundancy = 1; /* no redundancy for now */
+
+ if (grub_le_to_cpu64 (chunk->type) & GRUB_BTRFS_CHUNK_TYPE_RAID5)
+ {
+ grub_dprintf ("btrfs", "RAID5\n");
+ nparities = 1;
+ }
+ else
+ {
+ grub_dprintf ("btrfs", "RAID6\n");
+ nparities = 2;
+ }
+
+ /*
+ * RAID 6 layout consists of several stripes spread over
+ * the disks, e.g.:
+ *
+ * Disk_0 Disk_1 Disk_2 Disk_3
+ * A0 B0 P0 Q0
+ * Q1 A1 B1 P1
+ * P2 Q2 A2 B2
+ *
+ * Note: placement of the parities depend on row number.
+ *
+ * Pay attention that the btrfs terminology may differ from
+ * terminology used in other RAID implementations, e.g. LVM,
+ * dm or md. The main difference is that btrfs calls contiguous
+ * block of data on a given disk, e.g. A0, stripe instead of chunk.
+ *
+ * The variables listed below have following meaning:
+ * - stripe_nr is the stripe number excluding the parities
+ * (A0 = 0, B0 = 1, A1 = 2, B1 = 3, etc.),
+ * - high is the row number (0 for A0...Q0, 1 for Q1...P1, etc.),
+ * - stripen is the disk number in a row (0 for A0, Q1, P2,
+ * 1 for B0, A1, Q2, etc.),
+ * - off is the logical address to read,
+ * - chunk_stripe_length is the size of a stripe (typically 64 KiB),
+ * - nstripes is the number of disks in a row,
+ * - low is the offset of the data inside a stripe,
+ * - stripe_offset is the data offset in an array,
+ * - csize is the "potential" data to read; it will be reduced
+ * to size if the latter is smaller,
+ * - nparities is the number of parities (1 for RAID 5, 2 for
+ * RAID 6); used only in RAID 5/6 code.
+ */
+ stripe_nr = grub_divmod64 (off, chunk_stripe_length, &low);
+
+ /*
+ * stripen is computed without the parities
+ * (0 for A0, A1, A2, 1 for B0, B1, B2, etc.).
+ */
+ high = grub_divmod64 (stripe_nr, nstripes - nparities, &stripen);
+
+ /*
+ * The stripes are spread over the disks. Every each row their
+ * positions are shifted by 1 place. So, the real disks number
+ * change. Hence, we have to take current row number modulo
+ * nstripes into account (0 for A0, 1 for A1, 2 for A2, etc.).
+ */
+ grub_divmod64 (high + stripen, nstripes, &stripen);
+
+ stripe_offset = low + chunk_stripe_length * high;
+ csize = chunk_stripe_length - low;
+
break;
}
default:
--
2.19.1
Daniel Kiper
2018-10-17 13:46:09 UTC
Permalink
One nit pick below...
Post by Goffredo Baroncelli
---
grub-core/fs/btrfs.c | 73 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 73 insertions(+)
diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index be195448d..933a57d3b 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -119,6 +119,8 @@ struct grub_btrfs_chunk_item
#define GRUB_BTRFS_CHUNK_TYPE_RAID1 0x10
#define GRUB_BTRFS_CHUNK_TYPE_DUPLICATED 0x20
#define GRUB_BTRFS_CHUNK_TYPE_RAID10 0x40
+#define GRUB_BTRFS_CHUNK_TYPE_RAID5 0x80
+#define GRUB_BTRFS_CHUNK_TYPE_RAID6 0x100
grub_uint8_t dummy2[0xc];
grub_uint16_t nstripes;
grub_uint16_t nsubstripes;
@@ -764,6 +766,77 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
stripe_offset = low + chunk_stripe_length
* high;
csize = chunk_stripe_length - low;
+ break;
+ }
+ {
+ grub_uint64_t nparities, stripe_nr, high, low;
+
+ redundancy = 1; /* no redundancy for now */
+
+ if (grub_le_to_cpu64 (chunk->type) & GRUB_BTRFS_CHUNK_TYPE_RAID5)
+ {
+ grub_dprintf ("btrfs", "RAID5\n");
+ nparities = 1;
+ }
+ else
+ {
+ grub_dprintf ("btrfs", "RAID6\n");
+ nparities = 2;
+ }
+
+ /*
+ * RAID 6 layout consists of several stripes spread over
+ *
+ * Disk_0 Disk_1 Disk_2 Disk_3
+ * A0 B0 P0 Q0
+ * Q1 A1 B1 P1
+ * P2 Q2 A2 B2
+ *
+ * Note: placement of the parities depend on row number.
+ *
+ * Pay attention that the btrfs terminology may differ from
+ * terminology used in other RAID implementations, e.g. LVM,
+ * dm or md. The main difference is that btrfs calls contiguous
+ * block of data on a given disk, e.g. A0, stripe instead of chunk.
+ *
+ * - stripe_nr is the stripe number excluding the parities
+ * (A0 = 0, B0 = 1, A1 = 2, B1 = 3, etc.),
+ * - high is the row number (0 for A0...Q0, 1 for Q1...P1, etc.),
+ * - stripen is the disk number in a row (0 for A0, Q1, P2,
+ * 1 for B0, A1, Q2, etc.),
+ * - off is the logical address to read,
+ * - chunk_stripe_length is the size of a stripe (typically 64 KiB),
+ * - nstripes is the number of disks in a row,
+ * - low is the offset of the data inside a stripe,
+ * - stripe_offset is the data offset in an array,
+ * - csize is the "potential" data to read; it will be reduced
+ * to size if the latter is smaller,
+ * - nparities is the number of parities (1 for RAID 5, 2 for
+ * RAID 6); used only in RAID 5/6 code.
+ */
+ stripe_nr = grub_divmod64 (off, chunk_stripe_length, &low);
+
+ /*
+ * stripen is computed without the parities
+ * (0 for A0, A1, A2, 1 for B0, B1, B2, etc.).
+ */
+ high = grub_divmod64 (stripe_nr, nstripes - nparities, &stripen);
+
+ /*
+ * The stripes are spread over the disks. Every each row their
+ * positions are shifted by 1 place. So, the real disks number
+ * change. Hence, we have to take current row number modulo
+ * nstripes into account (0 for A0, 1 for A1, 2 for A2, etc.).
s/current row number modulo nstripes into account/into account current row number modulo nstripes/

Daniel
Goffredo Baroncelli
2018-10-11 18:50:59 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

A portion of the logging code is moved outside of internal for(;;). The part
that is left inside is the one which depends on the internal for(;;) index.

This is a preparatory patch. The next one will refactor the code inside
the for(;;) into an another function.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 25 ++++++++++++++-----------
1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index b2be80c33..a82211ccc 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -870,6 +870,18 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,

for (j = 0; j < 2; j++)
{
+ grub_dprintf ("btrfs", "chunk 0x%" PRIxGRUB_UINT64_T
+ "+0x%" PRIxGRUB_UINT64_T
+ " (%d stripes (%d substripes) of %"
+ PRIxGRUB_UINT64_T ")\n",
+ grub_le_to_cpu64 (key->offset),
+ grub_le_to_cpu64 (chunk->size),
+ grub_le_to_cpu16 (chunk->nstripes),
+ grub_le_to_cpu16 (chunk->nsubstripes),
+ grub_le_to_cpu64 (chunk->stripe_length));
+ grub_dprintf ("btrfs", "reading laddr 0x%" PRIxGRUB_UINT64_T "\n",
+ addr);
+
for (i = 0; i < redundancy; i++)
{
struct grub_btrfs_chunk_stripe *stripe;
@@ -882,20 +894,11 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,

paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;

- grub_dprintf ("btrfs", "chunk 0x%" PRIxGRUB_UINT64_T
- "+0x%" PRIxGRUB_UINT64_T
- " (%d stripes (%d substripes) of %"
- PRIxGRUB_UINT64_T ") stripe %" PRIxGRUB_UINT64_T
+ grub_dprintf ("btrfs", "stripe %" PRIxGRUB_UINT64_T
" maps to 0x%" PRIxGRUB_UINT64_T "\n",
- grub_le_to_cpu64 (key->offset),
- grub_le_to_cpu64 (chunk->size),
- grub_le_to_cpu16 (chunk->nstripes),
- grub_le_to_cpu16 (chunk->nsubstripes),
- grub_le_to_cpu64 (chunk->stripe_length),
stripen, stripe->offset);
grub_dprintf ("btrfs", "reading paddr 0x%" PRIxGRUB_UINT64_T
- " for laddr 0x%" PRIxGRUB_UINT64_T "\n", paddr,
- addr);
+ "\n", paddr);

dev = find_device (data, stripe->device_id);
if (!dev)
--
2.19.1
Goffredo Baroncelli
2018-10-11 18:51:00 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

Move the code in charge to read the data from disk into a separate
function. This helps to separate the error handling logic (which depends on
the different raid profiles) from the read from disk logic.
Refactoring this code increases the general readability too.

This is a preparatory patch, to help the adding of the RAID 5/6 recovery
code.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 75 ++++++++++++++++++++++++++------------------
1 file changed, 44 insertions(+), 31 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index a82211ccc..899dc32b7 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -625,6 +625,46 @@ find_device (struct grub_btrfs_data *data, grub_uint64_t id)
return ctx.dev_found;
}

+static grub_err_t
+btrfs_read_from_chunk (struct grub_btrfs_data *data,
+ struct grub_btrfs_chunk_item *chunk,
+ grub_uint64_t stripen, grub_uint64_t stripe_offset,
+ int redundancy, grub_uint64_t csize,
+ void *buf)
+{
+ struct grub_btrfs_chunk_stripe *stripe;
+ grub_disk_addr_t paddr;
+ grub_device_t dev;
+ grub_err_t err;
+
+ stripe = (struct grub_btrfs_chunk_stripe *) (chunk + 1);
+ /* Right now the redundancy handling is easy.
+ With RAID5-like it will be more difficult. */
+ stripe += stripen + redundancy;
+
+ paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;
+
+ grub_dprintf ("btrfs", "stripe %" PRIxGRUB_UINT64_T
+ " maps to 0x%" PRIxGRUB_UINT64_T
+ ". Reading paddr 0x%" PRIxGRUB_UINT64_T "\n",
+ stripen, stripe->offset, paddr);
+
+ dev = find_device (data, stripe->device_id);
+ if (!dev)
+ {
+ grub_dprintf ("btrfs",
+ "couldn't find a necessary member device "
+ "of multi-device filesystem\n");
+ grub_errno = GRUB_ERR_NONE;
+ return GRUB_ERR_READ_ERROR;
+ }
+
+ err = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
+ paddr & (GRUB_DISK_SECTOR_SIZE - 1),
+ csize, buf);
+ return err;
+}
+
static grub_err_t
grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
void *buf, grub_size_t size, int recursion_depth)
@@ -638,7 +678,6 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_err_t err = 0;
struct grub_btrfs_key key_out;
int challoc = 0;
- grub_device_t dev;
struct grub_btrfs_key key_in;
grub_size_t chsize;
grub_disk_addr_t chaddr;
@@ -884,36 +923,10 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,

for (i = 0; i < redundancy; i++)
{
- struct grub_btrfs_chunk_stripe *stripe;
- grub_disk_addr_t paddr;
-
- stripe = (struct grub_btrfs_chunk_stripe *) (chunk + 1);
- /* Right now the redundancy handling is easy.
- With RAID5-like it will be more difficult. */
- stripe += stripen + i;
-
- paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;
-
- grub_dprintf ("btrfs", "stripe %" PRIxGRUB_UINT64_T
- " maps to 0x%" PRIxGRUB_UINT64_T "\n",
- stripen, stripe->offset);
- grub_dprintf ("btrfs", "reading paddr 0x%" PRIxGRUB_UINT64_T
- "\n", paddr);
-
- dev = find_device (data, stripe->device_id);
- if (!dev)
- {
- grub_dprintf ("btrfs",
- "couldn't find a necessary member device "
- "of multi-device filesystem\n");
- err = grub_errno;
- grub_errno = GRUB_ERR_NONE;
- continue;
- }
-
- err = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
- paddr & (GRUB_DISK_SECTOR_SIZE - 1),
- csize, buf);
+ err = btrfs_read_from_chunk (data, chunk, stripen,
+ stripe_offset,
+ i, /* redundancy */
+ csize, buf);
if (!err)
break;
grub_errno = GRUB_ERR_NONE;
--
2.19.1
Goffredo Baroncelli
2018-10-11 18:51:03 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

Add the RAID 6 recovery, in order to use a RAID 6 filesystem even if some
disks (up to two) are missing. This code use the md RAID 6 code already
present in grub.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 60 +++++++++++++++++++++++++++++++++++++++-----
1 file changed, 54 insertions(+), 6 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index d066d54cc..d20ee09e4 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -30,6 +30,7 @@
#include <grub/i18n.h>
#include <grub/btrfs.h>
#include <grub/crypto.h>
+#include <grub/diskfilter.h>

GRUB_MOD_LICENSE ("GPLv3+");

@@ -702,11 +703,36 @@ rebuild_raid5 (char *dest, struct raid56_buffer *buffers,
}
}

+static grub_err_t
+raid6_recover_read_buffer (void *data, int disk_nr,
+ grub_uint64_t addr __attribute__ ((unused)),
+ void *dest, grub_size_t size)
+{
+ struct raid56_buffer *buffers = data;
+
+ if (!buffers[disk_nr].data_is_valid)
+ return grub_errno = GRUB_ERR_READ_ERROR;
+
+ grub_memcpy(dest, buffers[disk_nr].buf, size);
+
+ return grub_errno = GRUB_ERR_NONE;
+}
+
+static void
+rebuild_raid6 (struct raid56_buffer *buffers, grub_uint64_t nstripes,
+ grub_uint64_t csize, grub_uint64_t parities_pos, void *dest,
+ grub_uint64_t stripen)
+
+{
+ grub_raid6_recover_gen (buffers, nstripes, stripen, parities_pos,
+ dest, 0, csize, 0, raid6_recover_read_buffer);
+}
+
static grub_err_t
raid56_read_retry (struct grub_btrfs_data *data,
struct grub_btrfs_chunk_item *chunk,
- grub_uint64_t stripe_offset,
- grub_uint64_t csize, void *buf)
+ grub_uint64_t stripe_offset, grub_uint64_t stripen,
+ grub_uint64_t csize, void *buf, grub_uint64_t parities_pos)
{
struct raid56_buffer *buffers;
grub_uint64_t nstripes = grub_le_to_cpu16 (chunk->nstripes);
@@ -779,6 +805,15 @@ raid56_read_retry (struct grub_btrfs_data *data,
ret = GRUB_ERR_READ_ERROR;
goto cleanup;
}
+ else if (failed_devices > 2 && (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID6))
+ {
+ grub_dprintf ("btrfs",
+ "not enough disks for raid6: total %" PRIuGRUB_UINT64_T
+ ", missing %" PRIuGRUB_UINT64_T "\n",
+ nstripes, failed_devices);
+ ret = GRUB_ERR_READ_ERROR;
+ goto cleanup;
+ }
else
grub_dprintf ("btrfs",
"enough disks for RAID 5 rebuilding: total %"
@@ -789,7 +824,7 @@ raid56_read_retry (struct grub_btrfs_data *data,
if (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5)
rebuild_raid5 (buf, buffers, nstripes, csize);
else
- grub_dprintf ("btrfs", "called rebuild_raid6(), NOT IMPLEMENTED\n");
+ rebuild_raid6 (buffers, nstripes, csize, parities_pos, buf, stripen);

ret = GRUB_ERR_NONE;
cleanup:
@@ -879,9 +914,11 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
unsigned redundancy = 1;
unsigned i, j;
int is_raid56;
+ grub_uint64_t parities_pos = 0;

- is_raid56 = !!(grub_le_to_cpu64 (chunk->type) &
- GRUB_BTRFS_CHUNK_TYPE_RAID5);
+ is_raid56 = !!(grub_le_to_cpu64 (chunk->type) &
+ (GRUB_BTRFS_CHUNK_TYPE_RAID5 |
+ GRUB_BTRFS_CHUNK_TYPE_RAID6));

if (grub_le_to_cpu64 (chunk->size) <= off)
{
@@ -1030,6 +1067,17 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
*/
grub_divmod64 (high + stripen, nstripes, &stripen);

+ /*
+ * parities_pos is equal to "(high - nparities) % nstripes"
+ * (see the diagram above).
+ * However "high - nparities" might be negative (eg when high
+ * == 0) leading to an incorrect computation.
+ * Instead "high + nstripes - nparities" is always positive and
+ * in modulo nstripes is equal to "(high - nparities) % nstripes"
+ */
+ grub_divmod64 (high + nstripes - nparities, nstripes,
+ &parities_pos);
+
stripe_offset = low + chunk_stripe_length * high;
csize = chunk_stripe_length - low;

@@ -1081,7 +1129,7 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_errno = GRUB_ERR_NONE;
if (err)
err = raid56_read_retry (data, chunk, stripe_offset,
- csize, buf);
+ stripen, csize, buf, parities_pos);
}
if (!err)
break;
--
2.19.1
Daniel Kiper
2018-10-17 14:23:10 UTC
Permalink
Post by Goffredo Baroncelli
Add the RAID 6 recovery, in order to use a RAID 6 filesystem even if some
disks (up to two) are missing. This code use the md RAID 6 code already
present in grub.
---
grub-core/fs/btrfs.c | 60 +++++++++++++++++++++++++++++++++++++++-----
1 file changed, 54 insertions(+), 6 deletions(-)
diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index d066d54cc..d20ee09e4 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -30,6 +30,7 @@
#include <grub/i18n.h>
#include <grub/btrfs.h>
#include <grub/crypto.h>
+#include <grub/diskfilter.h>
GRUB_MOD_LICENSE ("GPLv3+");
@@ -702,11 +703,36 @@ rebuild_raid5 (char *dest, struct raid56_buffer *buffers,
}
}
+static grub_err_t
+raid6_recover_read_buffer (void *data, int disk_nr,
+ grub_uint64_t addr __attribute__ ((unused)),
+ void *dest, grub_size_t size)
+{
+ struct raid56_buffer *buffers = data;
+
+ if (!buffers[disk_nr].data_is_valid)
+ return grub_errno = GRUB_ERR_READ_ERROR;
+
+ grub_memcpy(dest, buffers[disk_nr].buf, size);
+
+ return grub_errno = GRUB_ERR_NONE;
+}
+
+static void
+rebuild_raid6 (struct raid56_buffer *buffers, grub_uint64_t nstripes,
+ grub_uint64_t csize, grub_uint64_t parities_pos, void *dest,
+ grub_uint64_t stripen)
+
+{
+ grub_raid6_recover_gen (buffers, nstripes, stripen, parities_pos,
+ dest, 0, csize, 0, raid6_recover_read_buffer);
+}
+
static grub_err_t
raid56_read_retry (struct grub_btrfs_data *data,
struct grub_btrfs_chunk_item *chunk,
- grub_uint64_t stripe_offset,
- grub_uint64_t csize, void *buf)
+ grub_uint64_t stripe_offset, grub_uint64_t stripen,
+ grub_uint64_t csize, void *buf, grub_uint64_t parities_pos)
{
struct raid56_buffer *buffers;
grub_uint64_t nstripes = grub_le_to_cpu16 (chunk->nstripes);
@@ -779,6 +805,15 @@ raid56_read_retry (struct grub_btrfs_data *data,
ret = GRUB_ERR_READ_ERROR;
goto cleanup;
}
+ else if (failed_devices > 2 && (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID6))
+ {
+ grub_dprintf ("btrfs",
+ "not enough disks for raid6: total %" PRIuGRUB_UINT64_T
s/not enough disks for raid6/not enough disks for RAID 6/

You are using "RAID 5" in earlier patch, so, please be consistent
and use "RAID 6" here.
Post by Goffredo Baroncelli
+ ", missing %" PRIuGRUB_UINT64_T "\n",
+ nstripes, failed_devices);
+ ret = GRUB_ERR_READ_ERROR;
+ goto cleanup;
+ }
else
grub_dprintf ("btrfs",
"enough disks for RAID 5 rebuilding: total %"
@@ -789,7 +824,7 @@ raid56_read_retry (struct grub_btrfs_data *data,
if (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5)
rebuild_raid5 (buf, buffers, nstripes, csize);
else
- grub_dprintf ("btrfs", "called rebuild_raid6(), NOT IMPLEMENTED\n");
+ rebuild_raid6 (buffers, nstripes, csize, parities_pos, buf, stripen);
ret = GRUB_ERR_NONE;
@@ -879,9 +914,11 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
unsigned redundancy = 1;
unsigned i, j;
int is_raid56;
+ grub_uint64_t parities_pos = 0;
- is_raid56 = !!(grub_le_to_cpu64 (chunk->type) &
- GRUB_BTRFS_CHUNK_TYPE_RAID5);
+ is_raid56 = !!(grub_le_to_cpu64 (chunk->type) &
+ (GRUB_BTRFS_CHUNK_TYPE_RAID5 |
+ GRUB_BTRFS_CHUNK_TYPE_RAID6));
if (grub_le_to_cpu64 (chunk->size) <= off)
{
@@ -1030,6 +1067,17 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
*/
grub_divmod64 (high + stripen, nstripes, &stripen);
+ /*
+ * parities_pos is equal to "(high - nparities) % nstripes"
+ * (see the diagram above).
+ * However "high - nparities" might be negative (eg when high
+ * == 0) leading to an incorrect computation.
However, "high - nparities" can be negative, eg. when high == 0,
leading to an incorrect results.
Post by Goffredo Baroncelli
+ * Instead "high + nstripes - nparities" is always positive and
+ * in modulo nstripes is equal to "(high - nparities) % nstripes"
"high + nstripes - nparities" is always positive and modulo
nstripes is equal to "(high - nparities) % nstripes".

If you change above mentioned things you can retain my
Reviewed-by: Daniel Kiper <***@oracle.com>

Daniel
Goffredo Baroncelli
2018-10-18 17:55:33 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Signed-off-by: Daniel Kiper <***@oracle.com>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 73 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 73 insertions(+)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index be195448d..9122169aa 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -119,6 +119,8 @@ struct grub_btrfs_chunk_item
#define GRUB_BTRFS_CHUNK_TYPE_RAID1 0x10
#define GRUB_BTRFS_CHUNK_TYPE_DUPLICATED 0x20
#define GRUB_BTRFS_CHUNK_TYPE_RAID10 0x40
+#define GRUB_BTRFS_CHUNK_TYPE_RAID5 0x80
+#define GRUB_BTRFS_CHUNK_TYPE_RAID6 0x100
grub_uint8_t dummy2[0xc];
grub_uint16_t nstripes;
grub_uint16_t nsubstripes;
@@ -764,6 +766,77 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
stripe_offset = low + chunk_stripe_length
* high;
csize = chunk_stripe_length - low;
+ break;
+ }
+ case GRUB_BTRFS_CHUNK_TYPE_RAID5:
+ case GRUB_BTRFS_CHUNK_TYPE_RAID6:
+ {
+ grub_uint64_t nparities, stripe_nr, high, low;
+
+ redundancy = 1; /* no redundancy for now */
+
+ if (grub_le_to_cpu64 (chunk->type) & GRUB_BTRFS_CHUNK_TYPE_RAID5)
+ {
+ grub_dprintf ("btrfs", "RAID5\n");
+ nparities = 1;
+ }
+ else
+ {
+ grub_dprintf ("btrfs", "RAID6\n");
+ nparities = 2;
+ }
+
+ /*
+ * RAID 6 layout consists of several stripes spread over
+ * the disks, e.g.:
+ *
+ * Disk_0 Disk_1 Disk_2 Disk_3
+ * A0 B0 P0 Q0
+ * Q1 A1 B1 P1
+ * P2 Q2 A2 B2
+ *
+ * Note: placement of the parities depend on row number.
+ *
+ * Pay attention that the btrfs terminology may differ from
+ * terminology used in other RAID implementations, e.g. LVM,
+ * dm or md. The main difference is that btrfs calls contiguous
+ * block of data on a given disk, e.g. A0, stripe instead of chunk.
+ *
+ * The variables listed below have following meaning:
+ * - stripe_nr is the stripe number excluding the parities
+ * (A0 = 0, B0 = 1, A1 = 2, B1 = 3, etc.),
+ * - high is the row number (0 for A0...Q0, 1 for Q1...P1, etc.),
+ * - stripen is the disk number in a row (0 for A0, Q1, P2,
+ * 1 for B0, A1, Q2, etc.),
+ * - off is the logical address to read,
+ * - chunk_stripe_length is the size of a stripe (typically 64 KiB),
+ * - nstripes is the number of disks in a row,
+ * - low is the offset of the data inside a stripe,
+ * - stripe_offset is the data offset in an array,
+ * - csize is the "potential" data to read; it will be reduced
+ * to size if the latter is smaller,
+ * - nparities is the number of parities (1 for RAID 5, 2 for
+ * RAID 6); used only in RAID 5/6 code.
+ */
+ stripe_nr = grub_divmod64 (off, chunk_stripe_length, &low);
+
+ /*
+ * stripen is computed without the parities
+ * (0 for A0, A1, A2, 1 for B0, B1, B2, etc.).
+ */
+ high = grub_divmod64 (stripe_nr, nstripes - nparities, &stripen);
+
+ /*
+ * The stripes are spread over the disks. Every each row their
+ * positions are shifted by 1 place. So, the real disks number
+ * change. Hence, we have to take into account current row number
+ * modulo nstripes (0 for A0, 1 for A1, 2 for A2, etc.).
+ */
+ grub_divmod64 (high + stripen, nstripes, &stripen);
+
+ stripe_offset = low + chunk_stripe_length * high;
+ csize = chunk_stripe_length - low;
+
break;
}
default:
--
2.19.1
Goffredo Baroncelli
2018-10-18 17:55:35 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

The caller knows better if this error is fatal or not, i.e. another disk is
available or not.

This is a preparatory patch.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 0cbf3551a..6b6e91cd1 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -603,12 +603,7 @@ find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
if (do_rescan)
grub_device_iterate (find_device_iter, &ctx);
if (!ctx.dev_found)
- {
- grub_error (GRUB_ERR_BAD_FS,
- N_("couldn't find a necessary member device "
- "of multi-device filesystem"));
- return NULL;
- }
+ return NULL;
data->n_devices_attached++;
if (data->n_devices_attached > data->n_devices_allocated)
{
@@ -905,6 +900,9 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
dev = find_device (data, stripe->device_id, j);
if (!dev)
{
+ grub_dprintf ("btrfs",
+ "couldn't find a necessary member device "
+ "of multi-device filesystem\n");
err = grub_errno;
grub_errno = GRUB_ERR_NONE;
continue;
--
2.19.1
Goffredo Baroncelli
2018-10-18 17:55:41 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

Add the RAID 6 recovery, in order to use a RAID 6 filesystem even if some
disks (up to two) are missing. This code use the md RAID 6 code already
present in grub.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 60 +++++++++++++++++++++++++++++++++++++++-----
1 file changed, 54 insertions(+), 6 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index b277f2904..9419d313d 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -30,6 +30,7 @@
#include <grub/i18n.h>
#include <grub/btrfs.h>
#include <grub/crypto.h>
+#include <grub/diskfilter.h>

GRUB_MOD_LICENSE ("GPLv3+");

@@ -701,11 +702,36 @@ rebuild_raid5 (char *dest, struct raid56_buffer *buffers,
}
}

+static grub_err_t
+raid6_recover_read_buffer (void *data, int disk_nr,
+ grub_uint64_t addr __attribute__ ((unused)),
+ void *dest, grub_size_t size)
+{
+ struct raid56_buffer *buffers = data;
+
+ if (!buffers[disk_nr].data_is_valid)
+ return grub_errno = GRUB_ERR_READ_ERROR;
+
+ grub_memcpy(dest, buffers[disk_nr].buf, size);
+
+ return grub_errno = GRUB_ERR_NONE;
+}
+
+static void
+rebuild_raid6 (struct raid56_buffer *buffers, grub_uint64_t nstripes,
+ grub_uint64_t csize, grub_uint64_t parities_pos, void *dest,
+ grub_uint64_t stripen)
+
+{
+ grub_raid6_recover_gen (buffers, nstripes, stripen, parities_pos,
+ dest, 0, csize, 0, raid6_recover_read_buffer);
+}
+
static grub_err_t
raid56_read_retry (struct grub_btrfs_data *data,
struct grub_btrfs_chunk_item *chunk,
- grub_uint64_t stripe_offset,
- grub_uint64_t csize, void *buf)
+ grub_uint64_t stripe_offset, grub_uint64_t stripen,
+ grub_uint64_t csize, void *buf, grub_uint64_t parities_pos)
{
struct raid56_buffer *buffers;
grub_uint64_t nstripes = grub_le_to_cpu16 (chunk->nstripes);
@@ -778,6 +804,15 @@ raid56_read_retry (struct grub_btrfs_data *data,
ret = GRUB_ERR_READ_ERROR;
goto cleanup;
}
+ else if (failed_devices > 2 && (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID6))
+ {
+ grub_dprintf ("btrfs",
+ "not enough disks for RAID 6: total %" PRIuGRUB_UINT64_T
+ ", missing %" PRIuGRUB_UINT64_T "\n",
+ nstripes, failed_devices);
+ ret = GRUB_ERR_READ_ERROR;
+ goto cleanup;
+ }
else
grub_dprintf ("btrfs",
"enough disks for RAID 5: total %"
@@ -788,7 +823,7 @@ raid56_read_retry (struct grub_btrfs_data *data,
if (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5)
rebuild_raid5 (buf, buffers, nstripes, csize);
else
- grub_dprintf ("btrfs", "called rebuild_raid6(), NOT IMPLEMENTED\n");
+ rebuild_raid6 (buffers, nstripes, csize, parities_pos, buf, stripen);

ret = GRUB_ERR_NONE;
cleanup:
@@ -878,9 +913,11 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
unsigned redundancy = 1;
unsigned i, j;
int is_raid56;
+ grub_uint64_t parities_pos = 0;

- is_raid56 = !!(grub_le_to_cpu64 (chunk->type) &
- GRUB_BTRFS_CHUNK_TYPE_RAID5);
+ is_raid56 = !!(grub_le_to_cpu64 (chunk->type) &
+ (GRUB_BTRFS_CHUNK_TYPE_RAID5 |
+ GRUB_BTRFS_CHUNK_TYPE_RAID6));

if (grub_le_to_cpu64 (chunk->size) <= off)
{
@@ -1029,6 +1066,17 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
*/
grub_divmod64 (high + stripen, nstripes, &stripen);

+ /*
+ * parities_pos is equal to "(high - nparities) % nstripes"
+ * (see the diagram above).
+ * However "high - nparities" can be negative, eg. when high
+ * == 0 leading to an incorrect computation.
+ * "high + nstripes - nparities" is always positive and in
+ * modulo nstripes is equal to "(high - nparities) % nstripes"
+ */
+ grub_divmod64 (high + nstripes - nparities, nstripes,
+ &parities_pos);
+
stripe_offset = low + chunk_stripe_length * high;
csize = chunk_stripe_length - low;

@@ -1069,7 +1117,7 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_errno = GRUB_ERR_NONE;
if (err)
err = raid56_read_retry (data, chunk, stripe_offset,
- csize, buf);
+ stripen, csize, buf, parities_pos);
}
else
for (i = 0; i < redundancy; i++)
--
2.19.1
Goffredo Baroncelli
2018-10-18 17:55:34 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

This helper is used in a few places to help the debugging. As
conservative approach the error is only logged.
This does not impact the error handling.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 9122169aa..0cbf3551a 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -77,7 +77,8 @@ struct btrfs_header
{
grub_btrfs_checksum_t checksum;
grub_btrfs_uuid_t uuid;
- grub_uint8_t dummy[0x30];
+ grub_uint64_t bytenr;
+ grub_uint8_t dummy[0x28];
grub_uint32_t nitems;
grub_uint8_t level;
} GRUB_PACKED;
@@ -286,6 +287,25 @@ free_iterator (struct grub_btrfs_leaf_descriptor *desc)
grub_free (desc->data);
}

+static grub_err_t
+check_btrfs_header (struct grub_btrfs_data *data, struct btrfs_header *header,
+ grub_disk_addr_t addr)
+{
+ if (grub_le_to_cpu64 (header->bytenr) != addr)
+ {
+ grub_dprintf ("btrfs", "btrfs_header.bytenr is not equal node addr\n");
+ return grub_error (GRUB_ERR_BAD_FS,
+ "header bytenr is not equal node addr");
+ }
+ if (grub_memcmp (data->sblock.uuid, header->uuid, sizeof(grub_btrfs_uuid_t)))
+ {
+ grub_dprintf ("btrfs", "btrfs_header.uuid doesn't match sblock uuid\n");
+ return grub_error (GRUB_ERR_BAD_FS,
+ "header uuid doesn't match sblock uuid");
+ }
+ return GRUB_ERR_NONE;
+}
+
static grub_err_t
save_ref (struct grub_btrfs_leaf_descriptor *desc,
grub_disk_addr_t addr, unsigned i, unsigned m, int l)
@@ -341,6 +361,7 @@ next (struct grub_btrfs_data *data,

err = grub_btrfs_read_logical (data, grub_le_to_cpu64 (node.addr),
&head, sizeof (head), 0);
+ check_btrfs_header (data, &head, grub_le_to_cpu64 (node.addr));
if (err)
return -err;

@@ -402,6 +423,7 @@ lower_bound (struct grub_btrfs_data *data,
/* FIXME: preread few nodes into buffer. */
err = grub_btrfs_read_logical (data, addr, &head, sizeof (head),
recursion_depth + 1);
+ check_btrfs_header (data, &head, addr);
if (err)
return err;
addr += sizeof (head);
--
2.19.1
Goffredo Baroncelli
2018-10-18 17:55:37 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

A portion of the logging code is moved outside of internal for(;;). The part
that is left inside is the one which depends on the internal for(;;) index.

This is a preparatory patch. The next one will refactor the code inside
the for(;;) into an another function.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 25 ++++++++++++++-----------
1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 81f3bc120..dde0edd03 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -870,6 +870,18 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,

for (j = 0; j < 2; j++)
{
+ grub_dprintf ("btrfs", "chunk 0x%" PRIxGRUB_UINT64_T
+ "+0x%" PRIxGRUB_UINT64_T
+ " (%d stripes (%d substripes) of %"
+ PRIxGRUB_UINT64_T ")\n",
+ grub_le_to_cpu64 (key->offset),
+ grub_le_to_cpu64 (chunk->size),
+ grub_le_to_cpu16 (chunk->nstripes),
+ grub_le_to_cpu16 (chunk->nsubstripes),
+ grub_le_to_cpu64 (chunk->stripe_length));
+ grub_dprintf ("btrfs", "reading laddr 0x%" PRIxGRUB_UINT64_T "\n",
+ addr);
+
for (i = 0; i < redundancy; i++)
{
struct grub_btrfs_chunk_stripe *stripe;
@@ -882,20 +894,11 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,

paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;

- grub_dprintf ("btrfs", "chunk 0x%" PRIxGRUB_UINT64_T
- "+0x%" PRIxGRUB_UINT64_T
- " (%d stripes (%d substripes) of %"
- PRIxGRUB_UINT64_T ") stripe %" PRIxGRUB_UINT64_T
+ grub_dprintf ("btrfs", "stripe %" PRIxGRUB_UINT64_T
" maps to 0x%" PRIxGRUB_UINT64_T "\n",
- grub_le_to_cpu64 (key->offset),
- grub_le_to_cpu64 (chunk->size),
- grub_le_to_cpu16 (chunk->nstripes),
- grub_le_to_cpu16 (chunk->nsubstripes),
- grub_le_to_cpu64 (chunk->stripe_length),
stripen, stripe->offset);
grub_dprintf ("btrfs", "reading paddr 0x%" PRIxGRUB_UINT64_T
- " for laddr 0x%" PRIxGRUB_UINT64_T "\n", paddr,
- addr);
+ "\n", paddr);

dev = find_device (data, stripe->device_id);
if (!dev)
--
2.19.1
Goffredo Baroncelli
2018-10-18 17:55:36 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

Currently read from missing device triggers rescan. However, it is never
recorded that the device is missing. So, each read of a missing device
triggers rescan again and again. This behavior causes a lot of unneeded
rescans leading to huge slowdowns.

This patch fixes above mentioned issue. Information about missing devices
is stored in the data->devices_attached[] array as NULL value in dev
member. Rescan is triggered only if no information is found for a given
device. This means that only first time read triggers rescan.

The patch drops premature return. This way data->devices_attached[] is
filled even when a given device is missing.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Signed-off-by: Daniel Kiper <***@oracle.com>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 6b6e91cd1..81f3bc120 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -588,7 +588,7 @@ find_device_iter (const char *name, void *data)
}

static grub_device_t
-find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
+find_device (struct grub_btrfs_data *data, grub_uint64_t id)
{
struct find_device_ctx ctx = {
.data = data,
@@ -600,10 +600,9 @@ find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
for (i = 0; i < data->n_devices_attached; i++)
if (id == data->devices_attached[i].id)
return data->devices_attached[i].dev;
- if (do_rescan)
- grub_device_iterate (find_device_iter, &ctx);
- if (!ctx.dev_found)
- return NULL;
+
+ grub_device_iterate (find_device_iter, &ctx);
+
data->n_devices_attached++;
if (data->n_devices_attached > data->n_devices_allocated)
{
@@ -615,7 +614,8 @@ find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
* sizeof (data->devices_attached[0]));
if (!data->devices_attached)
{
- grub_device_close (ctx.dev_found);
+ if (ctx.dev_found)
+ grub_device_close (ctx.dev_found);
data->devices_attached = tmp;
return NULL;
}
@@ -897,7 +897,7 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
" for laddr 0x%" PRIxGRUB_UINT64_T "\n", paddr,
addr);

- dev = find_device (data, stripe->device_id, j);
+ dev = find_device (data, stripe->device_id);
if (!dev)
{
grub_dprintf ("btrfs",
@@ -974,7 +974,8 @@ grub_btrfs_unmount (struct grub_btrfs_data *data)
unsigned i;
/* The device 0 is closed one layer upper. */
for (i = 1; i < data->n_devices_attached; i++)
- grub_device_close (data->devices_attached[i].dev);
+ if (data->devices_attached[i].dev)
+ grub_device_close (data->devices_attached[i].dev);
grub_free (data->devices_attached);
grub_free (data->extent);
grub_free (data);
--
2.19.1
Goffredo Baroncelli
2018-10-18 17:55:38 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

Move the code in charge to read the data from disk into a separate
function. This helps to separate the error handling logic (which depends on
the different raid profiles) from the read from disk logic.
Refactoring this code increases the general readability too.

This is a preparatory patch, to help the adding of the RAID 5/6 recovery
code.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 75 ++++++++++++++++++++++++++------------------
1 file changed, 44 insertions(+), 31 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index dde0edd03..ea97f0502 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -625,6 +625,46 @@ find_device (struct grub_btrfs_data *data, grub_uint64_t id)
return ctx.dev_found;
}

+static grub_err_t
+btrfs_read_from_chunk (struct grub_btrfs_data *data,
+ struct grub_btrfs_chunk_item *chunk,
+ grub_uint64_t stripen, grub_uint64_t stripe_offset,
+ int redundancy, grub_uint64_t csize,
+ void *buf)
+{
+ struct grub_btrfs_chunk_stripe *stripe;
+ grub_disk_addr_t paddr;
+ grub_device_t dev;
+ grub_err_t err;
+
+ stripe = (struct grub_btrfs_chunk_stripe *) (chunk + 1);
+ /* Right now the redundancy handling is easy.
+ With RAID5-like it will be more difficult. */
+ stripe += stripen + redundancy;
+
+ paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;
+
+ grub_dprintf ("btrfs", "stripe %" PRIxGRUB_UINT64_T
+ " maps to 0x%" PRIxGRUB_UINT64_T
+ ". Reading paddr 0x%" PRIxGRUB_UINT64_T "\n",
+ stripen, stripe->offset, paddr);
+
+ dev = find_device (data, stripe->device_id);
+ if (!dev)
+ {
+ grub_dprintf ("btrfs",
+ "couldn't find a necessary member device "
+ "of multi-device filesystem\n");
+ grub_errno = GRUB_ERR_NONE;
+ return GRUB_ERR_READ_ERROR;
+ }
+
+ err = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
+ paddr & (GRUB_DISK_SECTOR_SIZE - 1),
+ csize, buf);
+ return err;
+}
+
static grub_err_t
grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
void *buf, grub_size_t size, int recursion_depth)
@@ -638,7 +678,6 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_err_t err = 0;
struct grub_btrfs_key key_out;
int challoc = 0;
- grub_device_t dev;
struct grub_btrfs_key key_in;
grub_size_t chsize;
grub_disk_addr_t chaddr;
@@ -884,36 +923,10 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,

for (i = 0; i < redundancy; i++)
{
- struct grub_btrfs_chunk_stripe *stripe;
- grub_disk_addr_t paddr;
-
- stripe = (struct grub_btrfs_chunk_stripe *) (chunk + 1);
- /* Right now the redundancy handling is easy.
- With RAID5-like it will be more difficult. */
- stripe += stripen + i;
-
- paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;
-
- grub_dprintf ("btrfs", "stripe %" PRIxGRUB_UINT64_T
- " maps to 0x%" PRIxGRUB_UINT64_T "\n",
- stripen, stripe->offset);
- grub_dprintf ("btrfs", "reading paddr 0x%" PRIxGRUB_UINT64_T
- "\n", paddr);
-
- dev = find_device (data, stripe->device_id);
- if (!dev)
- {
- grub_dprintf ("btrfs",
- "couldn't find a necessary member device "
- "of multi-device filesystem\n");
- err = grub_errno;
- grub_errno = GRUB_ERR_NONE;
- continue;
- }
-
- err = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
- paddr & (GRUB_DISK_SECTOR_SIZE - 1),
- csize, buf);
+ err = btrfs_read_from_chunk (data, chunk, stripen,
+ stripe_offset,
+ i, /* redundancy */
+ csize, buf);
if (!err)
break;
grub_errno = GRUB_ERR_NONE;
--
2.19.1
Goffredo Baroncelli
2018-10-18 17:55:40 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

The original code which handles the recovery of a RAID 6 disks array
assumes that all reads are multiple of 1 << GRUB_DISK_SECTOR_BITS and it
assumes that all the I/O is done via the struct grub_diskfilter_segment.
This is not true for the btrfs code. In order to reuse the native
grub_raid6_recover() code, it is modified to not call
grub_diskfilter_read_node() directly, but to call an handler passed
as an argument.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/disk/raid6_recover.c | 52 ++++++++++++++++++++++------------
include/grub/diskfilter.h | 9 ++++++
2 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/grub-core/disk/raid6_recover.c b/grub-core/disk/raid6_recover.c
index aa674f6ca..0cf691ddf 100644
--- a/grub-core/disk/raid6_recover.c
+++ b/grub-core/disk/raid6_recover.c
@@ -74,14 +74,26 @@ mod_255 (unsigned x)
}

static grub_err_t
-grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
- char *buf, grub_disk_addr_t sector, grub_size_t size)
+raid6_recover_read_node (void *data, int disknr,
+ grub_uint64_t sector,
+ void *buf, grub_size_t size)
+{
+ struct grub_diskfilter_segment *array = data;
+
+ return grub_diskfilter_read_node (&array->nodes[disknr],
+ (grub_disk_addr_t)sector,
+ size >> GRUB_DISK_SECTOR_BITS, buf);
+}
+
+grub_err_t
+grub_raid6_recover_gen (void *data, grub_uint64_t nstripes, int disknr, int p,
+ char *buf, grub_uint64_t sector, grub_size_t size,
+ int layout, raid_recover_read_t read_func)
{
int i, q, pos;
int bad1 = -1, bad2 = -1;
char *pbuf = 0, *qbuf = 0;

- size <<= GRUB_DISK_SECTOR_BITS;
pbuf = grub_zalloc (size);
if (!pbuf)
goto quit;
@@ -91,17 +103,17 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
goto quit;

q = p + 1;
- if (q == (int) array->node_count)
+ if (q == (int) nstripes)
q = 0;

pos = q + 1;
- if (pos == (int) array->node_count)
+ if (pos == (int) nstripes)
pos = 0;

- for (i = 0; i < (int) array->node_count - 2; i++)
+ for (i = 0; i < (int) nstripes - 2; i++)
{
int c;
- if (array->layout & GRUB_RAID_LAYOUT_MUL_FROM_POS)
+ if (layout & GRUB_RAID_LAYOUT_MUL_FROM_POS)
c = pos;
else
c = i;
@@ -109,8 +121,7 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
bad1 = c;
else
{
- if (! grub_diskfilter_read_node (&array->nodes[pos], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf))
+ if (!read_func(data, pos, sector, buf, size))
{
grub_crypto_xor (pbuf, pbuf, buf, size);
grub_raid_block_mulx (c, buf, size);
@@ -128,7 +139,7 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
}

pos++;
- if (pos == (int) array->node_count)
+ if (pos == (int) nstripes)
pos = 0;
}

@@ -139,16 +150,14 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
if (bad2 < 0)
{
/* One bad device */
- if ((! grub_diskfilter_read_node (&array->nodes[p], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf)))
+ if (!read_func(data, p, sector, buf, size))
{
grub_crypto_xor (buf, buf, pbuf, size);
goto quit;
}

grub_errno = GRUB_ERR_NONE;
- if (grub_diskfilter_read_node (&array->nodes[q], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf))
+ if (read_func(data, q, sector, buf, size))
goto quit;

grub_crypto_xor (buf, buf, qbuf, size);
@@ -160,14 +169,12 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
/* Two bad devices */
unsigned c;

- if (grub_diskfilter_read_node (&array->nodes[p], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf))
+ if (read_func(data, p, sector, buf, size))
goto quit;

grub_crypto_xor (pbuf, pbuf, buf, size);

- if (grub_diskfilter_read_node (&array->nodes[q], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf))
+ if (read_func(data, q, sector, buf, size))
goto quit;

grub_crypto_xor (qbuf, qbuf, buf, size);
@@ -190,6 +197,15 @@ quit:
return grub_errno;
}

+static grub_err_t
+grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
+ char *buf, grub_disk_addr_t sector, grub_size_t size)
+{
+ return grub_raid6_recover_gen (array, array->node_count, disknr, p, buf,
+ sector, size << GRUB_DISK_SECTOR_BITS,
+ array->layout, raid6_recover_read_node);
+}
+
GRUB_MOD_INIT(raid6rec)
{
grub_raid6_init_table ();
diff --git a/include/grub/diskfilter.h b/include/grub/diskfilter.h
index d89273c1b..8deb1a8c3 100644
--- a/include/grub/diskfilter.h
+++ b/include/grub/diskfilter.h
@@ -189,6 +189,15 @@ typedef grub_err_t (*grub_raid6_recover_func_t) (struct grub_diskfilter_segment
extern grub_raid5_recover_func_t grub_raid5_recover_func;
extern grub_raid6_recover_func_t grub_raid6_recover_func;

+typedef grub_err_t (* raid_recover_read_t)(void *data, int disk_nr,
+ grub_uint64_t addr, void *dest,
+ grub_size_t size);
+
+extern grub_err_t
+grub_raid6_recover_gen (void *data, grub_uint64_t nstripes, int disknr, int p,
+ char *buf, grub_uint64_t sector, grub_size_t size,
+ int layout, raid_recover_read_t read_func);
+
grub_err_t grub_diskfilter_vg_register (struct grub_diskfilter_vg *vg);

grub_err_t
--
2.19.1
Goffredo Baroncelli
2018-10-18 17:55:39 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

Add support for recovery for a RAID 5 btrfs profile. In addition
it is added some code as preparatory work for RAID 6 recovery code.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
---
grub-core/fs/btrfs.c | 161 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 156 insertions(+), 5 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index ea97f0502..b277f2904 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -29,6 +29,7 @@
#include <minilzo.h>
#include <grub/i18n.h>
#include <grub/btrfs.h>
+#include <grub/crypto.h>

GRUB_MOD_LICENSE ("GPLv3+");

@@ -665,6 +666,140 @@ btrfs_read_from_chunk (struct grub_btrfs_data *data,
return err;
}

+struct raid56_buffer {
+ void *buf;
+ int data_is_valid;
+};
+
+static void
+rebuild_raid5 (char *dest, struct raid56_buffer *buffers,
+ grub_uint64_t nstripes, grub_uint64_t csize)
+{
+ grub_uint64_t i;
+ int first;
+
+ for(i = 0; buffers[i].data_is_valid && i < nstripes; i++);
+
+ if (i == nstripes)
+ {
+ grub_dprintf ("btrfs", "called rebuild_raid5(), but all disks are OK\n");
+ return;
+ }
+
+ grub_dprintf ("btrfs", "rebuilding RAID 5 stripe #%" PRIuGRUB_UINT64_T "\n", i);
+
+ for (i = 0, first = 1; i < nstripes; i++)
+ {
+ if (!buffers[i].data_is_valid)
+ continue;
+
+ if (first) {
+ grub_memcpy(dest, buffers[i].buf, csize);
+ first = 0;
+ } else
+ grub_crypto_xor (dest, dest, buffers[i].buf, csize);
+ }
+}
+
+static grub_err_t
+raid56_read_retry (struct grub_btrfs_data *data,
+ struct grub_btrfs_chunk_item *chunk,
+ grub_uint64_t stripe_offset,
+ grub_uint64_t csize, void *buf)
+{
+ struct raid56_buffer *buffers;
+ grub_uint64_t nstripes = grub_le_to_cpu16 (chunk->nstripes);
+ grub_uint64_t chunk_type = grub_le_to_cpu64 (chunk->type);
+ grub_err_t ret = GRUB_ERR_OUT_OF_MEMORY;
+ grub_uint64_t i, failed_devices;
+
+ buffers = grub_zalloc (sizeof(*buffers) * nstripes);
+ if (!buffers)
+ goto cleanup;
+
+ for (i = 0; i < nstripes; i++)
+ {
+ buffers[i].buf = grub_zalloc (csize);
+ if (!buffers[i].buf)
+ goto cleanup;
+ }
+
+ for (failed_devices = 0, i = 0; i < nstripes; i++)
+ {
+ struct grub_btrfs_chunk_stripe *stripe;
+ grub_disk_addr_t paddr;
+ grub_device_t dev;
+ grub_err_t err;
+
+ /* The struct grub_btrfs_chunk_stripe array lives behind struct
+ grub_btrfs_chunk_item. */
+ stripe = (struct grub_btrfs_chunk_stripe *) (chunk + 1) + i;
+
+ paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;
+ grub_dprintf ("btrfs", "reading paddr %" PRIxGRUB_UINT64_T
+ " from stripe ID %" PRIxGRUB_UINT64_T "\n", paddr,
+ stripe->device_id);
+
+ dev = find_device (data, stripe->device_id);
+ if (!dev)
+ {
+ buffers[i].data_is_valid = 0;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " FAILED (dev ID %"
+ PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
+ failed_devices++;
+ continue;
+ }
+
+ err = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
+ paddr & (GRUB_DISK_SECTOR_SIZE - 1),
+ csize, buffers[i].buf);
+ if (err == GRUB_ERR_NONE)
+ {
+ buffers[i].data_is_valid = 1;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " OK (dev ID %"
+ PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
+ }
+ else
+ {
+ buffers[i].data_is_valid = 0;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T
+ " FAILED (dev ID %" PRIxGRUB_UINT64_T ")\n", i,
+ stripe->device_id);
+ failed_devices++;
+ }
+ }
+
+ if (failed_devices > 1 && (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5))
+ {
+ grub_dprintf ("btrfs",
+ "not enough disks for RAID 5: total %" PRIuGRUB_UINT64_T
+ ", missing %" PRIuGRUB_UINT64_T "\n",
+ nstripes, failed_devices);
+ ret = GRUB_ERR_READ_ERROR;
+ goto cleanup;
+ }
+ else
+ grub_dprintf ("btrfs",
+ "enough disks for RAID 5: total %"
+ PRIuGRUB_UINT64_T ", missing %" PRIuGRUB_UINT64_T "\n",
+ nstripes, failed_devices);
+
+ /* We have enough disks. So, rebuild the data. */
+ if (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5)
+ rebuild_raid5 (buf, buffers, nstripes, csize);
+ else
+ grub_dprintf ("btrfs", "called rebuild_raid6(), NOT IMPLEMENTED\n");
+
+ ret = GRUB_ERR_NONE;
+ cleanup:
+ if (buffers)
+ for (i = 0; i < nstripes; i++)
+ grub_free (buffers[i].buf);
+ grub_free (buffers);
+
+ return ret;
+}
+
static grub_err_t
grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
void *buf, grub_size_t size, int recursion_depth)
@@ -742,6 +877,10 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_uint16_t nstripes;
unsigned redundancy = 1;
unsigned i, j;
+ int is_raid56;
+
+ is_raid56 = !!(grub_le_to_cpu64 (chunk->type) &
+ GRUB_BTRFS_CHUNK_TYPE_RAID5);

if (grub_le_to_cpu64 (chunk->size) <= off)
{
@@ -921,17 +1060,29 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_dprintf ("btrfs", "reading laddr 0x%" PRIxGRUB_UINT64_T "\n",
addr);

- for (i = 0; i < redundancy; i++)
+ if (is_raid56)
{
err = btrfs_read_from_chunk (data, chunk, stripen,
stripe_offset,
- i, /* redundancy */
+ 0, /* no mirror */
csize, buf);
- if (!err)
- break;
grub_errno = GRUB_ERR_NONE;
+ if (err)
+ err = raid56_read_retry (data, chunk, stripe_offset,
+ csize, buf);
}
- if (i != redundancy)
+ else
+ for (i = 0; i < redundancy; i++)
+ {
+ err = btrfs_read_from_chunk (data, chunk, stripen,
+ stripe_offset,
+ i, /* redundancy */
+ csize, buf);
+ if (!err)
+ break;
+ grub_errno = GRUB_ERR_NONE;
+ }
+ if (!err)
break;
}
if (err)
--
2.19.1
Daniel Kiper
2018-10-22 10:04:03 UTC
Permalink
Post by Goffredo Baroncelli
Add support for recovery for a RAID 5 btrfs profile. In addition
it is added some code as preparatory work for RAID 6 recovery code.
---
grub-core/fs/btrfs.c | 161 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 156 insertions(+), 5 deletions(-)
diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index ea97f0502..b277f2904 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -29,6 +29,7 @@
#include <minilzo.h>
#include <grub/i18n.h>
#include <grub/btrfs.h>
+#include <grub/crypto.h>
GRUB_MOD_LICENSE ("GPLv3+");
@@ -665,6 +666,140 @@ btrfs_read_from_chunk (struct grub_btrfs_data *data,
return err;
}
+struct raid56_buffer {
+ void *buf;
+ int data_is_valid;
+};
+
+static void
+rebuild_raid5 (char *dest, struct raid56_buffer *buffers,
+ grub_uint64_t nstripes, grub_uint64_t csize)
+{
+ grub_uint64_t i;
+ int first;
+
+ for(i = 0; buffers[i].data_is_valid && i < nstripes; i++);
+
+ if (i == nstripes)
+ {
+ grub_dprintf ("btrfs", "called rebuild_raid5(), but all disks are OK\n");
+ return;
+ }
+
+ grub_dprintf ("btrfs", "rebuilding RAID 5 stripe #%" PRIuGRUB_UINT64_T "\n", i);
+
+ for (i = 0, first = 1; i < nstripes; i++)
+ {
+ if (!buffers[i].data_is_valid)
+ continue;
+
+ if (first) {
+ grub_memcpy(dest, buffers[i].buf, csize);
+ first = 0;
+ } else
+ grub_crypto_xor (dest, dest, buffers[i].buf, csize);
+ }
+}
+
+static grub_err_t
+raid56_read_retry (struct grub_btrfs_data *data,
+ struct grub_btrfs_chunk_item *chunk,
+ grub_uint64_t stripe_offset,
+ grub_uint64_t csize, void *buf)
+{
+ struct raid56_buffer *buffers;
+ grub_uint64_t nstripes = grub_le_to_cpu16 (chunk->nstripes);
+ grub_uint64_t chunk_type = grub_le_to_cpu64 (chunk->type);
+ grub_err_t ret = GRUB_ERR_OUT_OF_MEMORY;
+ grub_uint64_t i, failed_devices;
+
+ buffers = grub_zalloc (sizeof(*buffers) * nstripes);
+ if (!buffers)
+ goto cleanup;
+
+ for (i = 0; i < nstripes; i++)
+ {
+ buffers[i].buf = grub_zalloc (csize);
+ if (!buffers[i].buf)
+ goto cleanup;
+ }
+
+ for (failed_devices = 0, i = 0; i < nstripes; i++)
+ {
+ struct grub_btrfs_chunk_stripe *stripe;
+ grub_disk_addr_t paddr;
+ grub_device_t dev;
+ grub_err_t err;
+
+ /* The struct grub_btrfs_chunk_stripe array lives behind struct
+ grub_btrfs_chunk_item. */
+ stripe = (struct grub_btrfs_chunk_stripe *) (chunk + 1) + i;
+
+ paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;
+ grub_dprintf ("btrfs", "reading paddr %" PRIxGRUB_UINT64_T
+ " from stripe ID %" PRIxGRUB_UINT64_T "\n", paddr,
+ stripe->device_id);
+
+ dev = find_device (data, stripe->device_id);
+ if (!dev)
+ {
+ buffers[i].data_is_valid = 0;
You do not need this and...
Post by Goffredo Baroncelli
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " FAILED (dev ID %"
+ PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
+ failed_devices++;
+ continue;
+ }
+
+ err = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
+ paddr & (GRUB_DISK_SECTOR_SIZE - 1),
+ csize, buffers[i].buf);
+ if (err == GRUB_ERR_NONE)
+ {
+ buffers[i].data_is_valid = 1;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " OK (dev ID %"
+ PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
+ }
+ else
+ {
+ buffers[i].data_is_valid = 0;
...this. grub_zalloc() above did work for you.
Post by Goffredo Baroncelli
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T
+ " FAILED (dev ID %" PRIxGRUB_UINT64_T ")\n", i,
s/FAILED/READ FAILED/

Otherwise Reviewed-by: Daniel Kiper <***@oracle.com>

Daniel
Daniel Kiper
2018-10-22 11:02:45 UTC
Permalink
Hi All,
the aim of this patches set is to provide support for a BTRFS raid5/6
filesystem in GRUB.
The first patch, implements the basic support for raid5/6. I.e this works when
all the disks are present.
The next 5 patches, are preparatory ones.
The 7th patch implements the raid5 recovery for btrfs (i.e. handling the
disappearing of 1 disk).
The 8th patch makes the code for handling the raid6 recovery more generic.
The last one implements the raid6 recovery for btrfs (i.e. handling the
disappearing up to two disks).
I tested the code in grub-emu, and it works both with all the disks,
and with some disks missing. I checked the crc32 calculated from grub and
from linux and these matched. Finally I checked if the support for md raid6
still works properly, and it does (with all drives and with up to 2 drives
missing)
Comments are welcome.
In general whole patch series LGTM. +/- some nit picks including changes
for patch #7. If you are OK with them and there are no objections then
I will apply the patches in a week or so.

Thank you for doing the work.

Daniel
Goffredo Baroncelli
2018-10-22 17:30:39 UTC
Permalink
Post by Daniel Kiper
Hi All,
the aim of this patches set is to provide support for a BTRFS raid5/6
filesystem in GRUB.
[...]
Post by Daniel Kiper
In general whole patch series LGTM. +/- some nit picks including changes
for patch #7. If you are OK with them and there are no objections then
I will apply the patches in a week or so.
Good news; I will update the patch 7 and I will send patches set v11


BR
G.Baroncelli
Post by Daniel Kiper
Thank you for doing the work.
Daniel
_______________________________________________
Grub-devel mailing list
https://lists.gnu.org/mailman/listinfo/grub-devel
--
gpg @keyserver.linux.it: Goffredo Baroncelli <kreijackATinwind.it>
Key fingerprint BBF5 1610 0B64 DAC6 5F7D 17B2 0EDA 9B37 8B82 E0B5
Nick Terrell
2018-10-22 19:49:40 UTC
Permalink
Post by Daniel Kiper
Hi All,
the aim of this patches set is to provide support for a BTRFS raid5/6
filesystem in GRUB.
The first patch, implements the basic support for raid5/6. I.e this works when
all the disks are present.
The next 5 patches, are preparatory ones.
The 7th patch implements the raid5 recovery for btrfs (i.e. handling the
disappearing of 1 disk).
The 8th patch makes the code for handling the raid6 recovery more generic.
The last one implements the raid6 recovery for btrfs (i.e. handling the
disappearing up to two disks).
I tested the code in grub-emu, and it works both with all the disks,
and with some disks missing. I checked the crc32 calculated from grub and
from linux and these matched. Finally I checked if the support for md raid6
still works properly, and it does (with all drives and with up to 2 drives
missing)
Comments are welcome.
In general whole patch series LGTM. +/- some nit picks including changes
for patch #7. If you are OK with them and there are no objections then
I will apply the patches in a week or so.
Awesome! I'll look for the update and send an rebased version of the zstd
patch set when it is out.

-Nick
Daniel Kiper
2018-10-31 12:06:23 UTC
Permalink
Post by Nick Terrell
Post by Daniel Kiper
Hi All,
the aim of this patches set is to provide support for a BTRFS raid5/6
filesystem in GRUB.
The first patch, implements the basic support for raid5/6. I.e this works when
all the disks are present.
The next 5 patches, are preparatory ones.
The 7th patch implements the raid5 recovery for btrfs (i.e. handling the
disappearing of 1 disk).
The 8th patch makes the code for handling the raid6 recovery more generic.
The last one implements the raid6 recovery for btrfs (i.e. handling the
disappearing up to two disks).
I tested the code in grub-emu, and it works both with all the disks,
and with some disks missing. I checked the crc32 calculated from grub and
from linux and these matched. Finally I checked if the support for md raid6
still works properly, and it does (with all drives and with up to 2 drives
missing)
Comments are welcome.
In general whole patch series LGTM. +/- some nit picks including changes
for patch #7. If you are OK with them and there are no objections then
I will apply the patches in a week or so.
Awesome! I'll look for the update and send an rebased version of the zstd
patch set when it is out.
v11 pushed.

Goffredo, thank you for doing the work.

Nick, you can go ahead and rebase yours patchset.

Daniel
Goffredo Baroncelli
2018-10-31 18:48:08 UTC
Permalink
On 31/10/2018 13.06, Daniel Kiper wrote:
[...]
Post by Daniel Kiper
v11 pushed.
Goffredo, thank you for doing the work.
Great ! Many thanks for your support !!
Post by Daniel Kiper
Nick, you can go ahead and rebase yours patchset.
Daniel
BR
G.Baroncelli
--
gpg @keyserver.linux.it: Goffredo Baroncelli <kreijackATinwind.it>
Key fingerprint BBF5 1610 0B64 DAC6 5F7D 17B2 0EDA 9B37 8B82 E0B5
David Sterba
2018-10-31 18:58:35 UTC
Permalink
Post by Goffredo Baroncelli
[...]
Post by Daniel Kiper
v11 pushed.
Goffredo, thank you for doing the work.
Great ! Many thanks for your support !!
Thank you very much for the work! I've updated wiki with the good news.
Daniel Kiper
2018-11-09 12:39:34 UTC
Permalink
Post by Goffredo Baroncelli
[...]
Post by Daniel Kiper
v11 pushed.
Goffredo, thank you for doing the work.
Great ! Many thanks for your support !!
You are welcome!

Daniel

Goffredo Baroncelli
2018-10-22 17:29:35 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

A portion of the logging code is moved outside of internal for(;;). The part
that is left inside is the one which depends on the internal for(;;) index.

This is a preparatory patch. The next one will refactor the code inside
the for(;;) into an another function.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 25 ++++++++++++++-----------
1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 81f3bc120..dde0edd03 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -870,6 +870,18 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,

for (j = 0; j < 2; j++)
{
+ grub_dprintf ("btrfs", "chunk 0x%" PRIxGRUB_UINT64_T
+ "+0x%" PRIxGRUB_UINT64_T
+ " (%d stripes (%d substripes) of %"
+ PRIxGRUB_UINT64_T ")\n",
+ grub_le_to_cpu64 (key->offset),
+ grub_le_to_cpu64 (chunk->size),
+ grub_le_to_cpu16 (chunk->nstripes),
+ grub_le_to_cpu16 (chunk->nsubstripes),
+ grub_le_to_cpu64 (chunk->stripe_length));
+ grub_dprintf ("btrfs", "reading laddr 0x%" PRIxGRUB_UINT64_T "\n",
+ addr);
+
for (i = 0; i < redundancy; i++)
{
struct grub_btrfs_chunk_stripe *stripe;
@@ -882,20 +894,11 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,

paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;

- grub_dprintf ("btrfs", "chunk 0x%" PRIxGRUB_UINT64_T
- "+0x%" PRIxGRUB_UINT64_T
- " (%d stripes (%d substripes) of %"
- PRIxGRUB_UINT64_T ") stripe %" PRIxGRUB_UINT64_T
+ grub_dprintf ("btrfs", "stripe %" PRIxGRUB_UINT64_T
" maps to 0x%" PRIxGRUB_UINT64_T "\n",
- grub_le_to_cpu64 (key->offset),
- grub_le_to_cpu64 (chunk->size),
- grub_le_to_cpu16 (chunk->nstripes),
- grub_le_to_cpu16 (chunk->nsubstripes),
- grub_le_to_cpu64 (chunk->stripe_length),
stripen, stripe->offset);
grub_dprintf ("btrfs", "reading paddr 0x%" PRIxGRUB_UINT64_T
- " for laddr 0x%" PRIxGRUB_UINT64_T "\n", paddr,
- addr);
+ "\n", paddr);

dev = find_device (data, stripe->device_id);
if (!dev)
--
2.19.1
Goffredo Baroncelli
2018-10-22 17:29:32 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

This helper is used in a few places to help the debugging. As
conservative approach the error is only logged.
This does not impact the error handling.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 9122169aa..0cbf3551a 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -77,7 +77,8 @@ struct btrfs_header
{
grub_btrfs_checksum_t checksum;
grub_btrfs_uuid_t uuid;
- grub_uint8_t dummy[0x30];
+ grub_uint64_t bytenr;
+ grub_uint8_t dummy[0x28];
grub_uint32_t nitems;
grub_uint8_t level;
} GRUB_PACKED;
@@ -286,6 +287,25 @@ free_iterator (struct grub_btrfs_leaf_descriptor *desc)
grub_free (desc->data);
}

+static grub_err_t
+check_btrfs_header (struct grub_btrfs_data *data, struct btrfs_header *header,
+ grub_disk_addr_t addr)
+{
+ if (grub_le_to_cpu64 (header->bytenr) != addr)
+ {
+ grub_dprintf ("btrfs", "btrfs_header.bytenr is not equal node addr\n");
+ return grub_error (GRUB_ERR_BAD_FS,
+ "header bytenr is not equal node addr");
+ }
+ if (grub_memcmp (data->sblock.uuid, header->uuid, sizeof(grub_btrfs_uuid_t)))
+ {
+ grub_dprintf ("btrfs", "btrfs_header.uuid doesn't match sblock uuid\n");
+ return grub_error (GRUB_ERR_BAD_FS,
+ "header uuid doesn't match sblock uuid");
+ }
+ return GRUB_ERR_NONE;
+}
+
static grub_err_t
save_ref (struct grub_btrfs_leaf_descriptor *desc,
grub_disk_addr_t addr, unsigned i, unsigned m, int l)
@@ -341,6 +361,7 @@ next (struct grub_btrfs_data *data,

err = grub_btrfs_read_logical (data, grub_le_to_cpu64 (node.addr),
&head, sizeof (head), 0);
+ check_btrfs_header (data, &head, grub_le_to_cpu64 (node.addr));
if (err)
return -err;

@@ -402,6 +423,7 @@ lower_bound (struct grub_btrfs_data *data,
/* FIXME: preread few nodes into buffer. */
err = grub_btrfs_read_logical (data, addr, &head, sizeof (head),
recursion_depth + 1);
+ check_btrfs_header (data, &head, addr);
if (err)
return err;
addr += sizeof (head);
--
2.19.1
Goffredo Baroncelli
2018-10-22 17:29:38 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

The original code which handles the recovery of a RAID 6 disks array
assumes that all reads are multiple of 1 << GRUB_DISK_SECTOR_BITS and it
assumes that all the I/O is done via the struct grub_diskfilter_segment.
This is not true for the btrfs code. In order to reuse the native
grub_raid6_recover() code, it is modified to not call
grub_diskfilter_read_node() directly, but to call an handler passed
as an argument.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/disk/raid6_recover.c | 52 ++++++++++++++++++++++------------
include/grub/diskfilter.h | 9 ++++++
2 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/grub-core/disk/raid6_recover.c b/grub-core/disk/raid6_recover.c
index aa674f6ca..0cf691ddf 100644
--- a/grub-core/disk/raid6_recover.c
+++ b/grub-core/disk/raid6_recover.c
@@ -74,14 +74,26 @@ mod_255 (unsigned x)
}

static grub_err_t
-grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
- char *buf, grub_disk_addr_t sector, grub_size_t size)
+raid6_recover_read_node (void *data, int disknr,
+ grub_uint64_t sector,
+ void *buf, grub_size_t size)
+{
+ struct grub_diskfilter_segment *array = data;
+
+ return grub_diskfilter_read_node (&array->nodes[disknr],
+ (grub_disk_addr_t)sector,
+ size >> GRUB_DISK_SECTOR_BITS, buf);
+}
+
+grub_err_t
+grub_raid6_recover_gen (void *data, grub_uint64_t nstripes, int disknr, int p,
+ char *buf, grub_uint64_t sector, grub_size_t size,
+ int layout, raid_recover_read_t read_func)
{
int i, q, pos;
int bad1 = -1, bad2 = -1;
char *pbuf = 0, *qbuf = 0;

- size <<= GRUB_DISK_SECTOR_BITS;
pbuf = grub_zalloc (size);
if (!pbuf)
goto quit;
@@ -91,17 +103,17 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
goto quit;

q = p + 1;
- if (q == (int) array->node_count)
+ if (q == (int) nstripes)
q = 0;

pos = q + 1;
- if (pos == (int) array->node_count)
+ if (pos == (int) nstripes)
pos = 0;

- for (i = 0; i < (int) array->node_count - 2; i++)
+ for (i = 0; i < (int) nstripes - 2; i++)
{
int c;
- if (array->layout & GRUB_RAID_LAYOUT_MUL_FROM_POS)
+ if (layout & GRUB_RAID_LAYOUT_MUL_FROM_POS)
c = pos;
else
c = i;
@@ -109,8 +121,7 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
bad1 = c;
else
{
- if (! grub_diskfilter_read_node (&array->nodes[pos], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf))
+ if (!read_func(data, pos, sector, buf, size))
{
grub_crypto_xor (pbuf, pbuf, buf, size);
grub_raid_block_mulx (c, buf, size);
@@ -128,7 +139,7 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
}

pos++;
- if (pos == (int) array->node_count)
+ if (pos == (int) nstripes)
pos = 0;
}

@@ -139,16 +150,14 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
if (bad2 < 0)
{
/* One bad device */
- if ((! grub_diskfilter_read_node (&array->nodes[p], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf)))
+ if (!read_func(data, p, sector, buf, size))
{
grub_crypto_xor (buf, buf, pbuf, size);
goto quit;
}

grub_errno = GRUB_ERR_NONE;
- if (grub_diskfilter_read_node (&array->nodes[q], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf))
+ if (read_func(data, q, sector, buf, size))
goto quit;

grub_crypto_xor (buf, buf, qbuf, size);
@@ -160,14 +169,12 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
/* Two bad devices */
unsigned c;

- if (grub_diskfilter_read_node (&array->nodes[p], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf))
+ if (read_func(data, p, sector, buf, size))
goto quit;

grub_crypto_xor (pbuf, pbuf, buf, size);

- if (grub_diskfilter_read_node (&array->nodes[q], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf))
+ if (read_func(data, q, sector, buf, size))
goto quit;

grub_crypto_xor (qbuf, qbuf, buf, size);
@@ -190,6 +197,15 @@ quit:
return grub_errno;
}

+static grub_err_t
+grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
+ char *buf, grub_disk_addr_t sector, grub_size_t size)
+{
+ return grub_raid6_recover_gen (array, array->node_count, disknr, p, buf,
+ sector, size << GRUB_DISK_SECTOR_BITS,
+ array->layout, raid6_recover_read_node);
+}
+
GRUB_MOD_INIT(raid6rec)
{
grub_raid6_init_table ();
diff --git a/include/grub/diskfilter.h b/include/grub/diskfilter.h
index d89273c1b..8deb1a8c3 100644
--- a/include/grub/diskfilter.h
+++ b/include/grub/diskfilter.h
@@ -189,6 +189,15 @@ typedef grub_err_t (*grub_raid6_recover_func_t) (struct grub_diskfilter_segment
extern grub_raid5_recover_func_t grub_raid5_recover_func;
extern grub_raid6_recover_func_t grub_raid6_recover_func;

+typedef grub_err_t (* raid_recover_read_t)(void *data, int disk_nr,
+ grub_uint64_t addr, void *dest,
+ grub_size_t size);
+
+extern grub_err_t
+grub_raid6_recover_gen (void *data, grub_uint64_t nstripes, int disknr, int p,
+ char *buf, grub_uint64_t sector, grub_size_t size,
+ int layout, raid_recover_read_t read_func);
+
grub_err_t grub_diskfilter_vg_register (struct grub_diskfilter_vg *vg);

grub_err_t
--
2.19.1
Goffredo Baroncelli
2018-10-22 17:29:33 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

The caller knows better if this error is fatal or not, i.e. another disk is
available or not.

This is a preparatory patch.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 0cbf3551a..6b6e91cd1 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -603,12 +603,7 @@ find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
if (do_rescan)
grub_device_iterate (find_device_iter, &ctx);
if (!ctx.dev_found)
- {
- grub_error (GRUB_ERR_BAD_FS,
- N_("couldn't find a necessary member device "
- "of multi-device filesystem"));
- return NULL;
- }
+ return NULL;
data->n_devices_attached++;
if (data->n_devices_attached > data->n_devices_allocated)
{
@@ -905,6 +900,9 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
dev = find_device (data, stripe->device_id, j);
if (!dev)
{
+ grub_dprintf ("btrfs",
+ "couldn't find a necessary member device "
+ "of multi-device filesystem\n");
err = grub_errno;
grub_errno = GRUB_ERR_NONE;
continue;
--
2.19.1
Goffredo Baroncelli
2018-10-22 17:29:34 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

Currently read from missing device triggers rescan. However, it is never
recorded that the device is missing. So, each read of a missing device
triggers rescan again and again. This behavior causes a lot of unneeded
rescans leading to huge slowdowns.

This patch fixes above mentioned issue. Information about missing devices
is stored in the data->devices_attached[] array as NULL value in dev
member. Rescan is triggered only if no information is found for a given
device. This means that only first time read triggers rescan.

The patch drops premature return. This way data->devices_attached[] is
filled even when a given device is missing.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Signed-off-by: Daniel Kiper <***@oracle.com>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 6b6e91cd1..81f3bc120 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -588,7 +588,7 @@ find_device_iter (const char *name, void *data)
}

static grub_device_t
-find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
+find_device (struct grub_btrfs_data *data, grub_uint64_t id)
{
struct find_device_ctx ctx = {
.data = data,
@@ -600,10 +600,9 @@ find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
for (i = 0; i < data->n_devices_attached; i++)
if (id == data->devices_attached[i].id)
return data->devices_attached[i].dev;
- if (do_rescan)
- grub_device_iterate (find_device_iter, &ctx);
- if (!ctx.dev_found)
- return NULL;
+
+ grub_device_iterate (find_device_iter, &ctx);
+
data->n_devices_attached++;
if (data->n_devices_attached > data->n_devices_allocated)
{
@@ -615,7 +614,8 @@ find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
* sizeof (data->devices_attached[0]));
if (!data->devices_attached)
{
- grub_device_close (ctx.dev_found);
+ if (ctx.dev_found)
+ grub_device_close (ctx.dev_found);
data->devices_attached = tmp;
return NULL;
}
@@ -897,7 +897,7 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
" for laddr 0x%" PRIxGRUB_UINT64_T "\n", paddr,
addr);

- dev = find_device (data, stripe->device_id, j);
+ dev = find_device (data, stripe->device_id);
if (!dev)
{
grub_dprintf ("btrfs",
@@ -974,7 +974,8 @@ grub_btrfs_unmount (struct grub_btrfs_data *data)
unsigned i;
/* The device 0 is closed one layer upper. */
for (i = 1; i < data->n_devices_attached; i++)
- grub_device_close (data->devices_attached[i].dev);
+ if (data->devices_attached[i].dev)
+ grub_device_close (data->devices_attached[i].dev);
grub_free (data->devices_attached);
grub_free (data->extent);
grub_free (data);
--
2.19.1
Goffredo Baroncelli
2018-10-22 17:29:36 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

Move the code in charge to read the data from disk into a separate
function. This helps to separate the error handling logic (which depends on
the different raid profiles) from the read from disk logic.
Refactoring this code increases the general readability too.

This is a preparatory patch, to help the adding of the RAID 5/6 recovery
code.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 75 ++++++++++++++++++++++++++------------------
1 file changed, 44 insertions(+), 31 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index dde0edd03..ea97f0502 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -625,6 +625,46 @@ find_device (struct grub_btrfs_data *data, grub_uint64_t id)
return ctx.dev_found;
}

+static grub_err_t
+btrfs_read_from_chunk (struct grub_btrfs_data *data,
+ struct grub_btrfs_chunk_item *chunk,
+ grub_uint64_t stripen, grub_uint64_t stripe_offset,
+ int redundancy, grub_uint64_t csize,
+ void *buf)
+{
+ struct grub_btrfs_chunk_stripe *stripe;
+ grub_disk_addr_t paddr;
+ grub_device_t dev;
+ grub_err_t err;
+
+ stripe = (struct grub_btrfs_chunk_stripe *) (chunk + 1);
+ /* Right now the redundancy handling is easy.
+ With RAID5-like it will be more difficult. */
+ stripe += stripen + redundancy;
+
+ paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;
+
+ grub_dprintf ("btrfs", "stripe %" PRIxGRUB_UINT64_T
+ " maps to 0x%" PRIxGRUB_UINT64_T
+ ". Reading paddr 0x%" PRIxGRUB_UINT64_T "\n",
+ stripen, stripe->offset, paddr);
+
+ dev = find_device (data, stripe->device_id);
+ if (!dev)
+ {
+ grub_dprintf ("btrfs",
+ "couldn't find a necessary member device "
+ "of multi-device filesystem\n");
+ grub_errno = GRUB_ERR_NONE;
+ return GRUB_ERR_READ_ERROR;
+ }
+
+ err = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
+ paddr & (GRUB_DISK_SECTOR_SIZE - 1),
+ csize, buf);
+ return err;
+}
+
static grub_err_t
grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
void *buf, grub_size_t size, int recursion_depth)
@@ -638,7 +678,6 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_err_t err = 0;
struct grub_btrfs_key key_out;
int challoc = 0;
- grub_device_t dev;
struct grub_btrfs_key key_in;
grub_size_t chsize;
grub_disk_addr_t chaddr;
@@ -884,36 +923,10 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,

for (i = 0; i < redundancy; i++)
{
- struct grub_btrfs_chunk_stripe *stripe;
- grub_disk_addr_t paddr;
-
- stripe = (struct grub_btrfs_chunk_stripe *) (chunk + 1);
- /* Right now the redundancy handling is easy.
- With RAID5-like it will be more difficult. */
- stripe += stripen + i;
-
- paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;
-
- grub_dprintf ("btrfs", "stripe %" PRIxGRUB_UINT64_T
- " maps to 0x%" PRIxGRUB_UINT64_T "\n",
- stripen, stripe->offset);
- grub_dprintf ("btrfs", "reading paddr 0x%" PRIxGRUB_UINT64_T
- "\n", paddr);
-
- dev = find_device (data, stripe->device_id);
- if (!dev)
- {
- grub_dprintf ("btrfs",
- "couldn't find a necessary member device "
- "of multi-device filesystem\n");
- err = grub_errno;
- grub_errno = GRUB_ERR_NONE;
- continue;
- }
-
- err = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
- paddr & (GRUB_DISK_SECTOR_SIZE - 1),
- csize, buf);
+ err = btrfs_read_from_chunk (data, chunk, stripen,
+ stripe_offset,
+ i, /* redundancy */
+ csize, buf);
if (!err)
break;
grub_errno = GRUB_ERR_NONE;
--
2.19.1
Goffredo Baroncelli
2018-10-22 17:29:39 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

Add the RAID 6 recovery, in order to use a RAID 6 filesystem even if some
disks (up to two) are missing. This code use the md RAID 6 code already
present in grub.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 60 +++++++++++++++++++++++++++++++++++++++-----
1 file changed, 54 insertions(+), 6 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 965888da4..7c316fd33 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -30,6 +30,7 @@
#include <grub/i18n.h>
#include <grub/btrfs.h>
#include <grub/crypto.h>
+#include <grub/diskfilter.h>

GRUB_MOD_LICENSE ("GPLv3+");

@@ -701,11 +702,36 @@ rebuild_raid5 (char *dest, struct raid56_buffer *buffers,
}
}

+static grub_err_t
+raid6_recover_read_buffer (void *data, int disk_nr,
+ grub_uint64_t addr __attribute__ ((unused)),
+ void *dest, grub_size_t size)
+{
+ struct raid56_buffer *buffers = data;
+
+ if (!buffers[disk_nr].data_is_valid)
+ return grub_errno = GRUB_ERR_READ_ERROR;
+
+ grub_memcpy(dest, buffers[disk_nr].buf, size);
+
+ return grub_errno = GRUB_ERR_NONE;
+}
+
+static void
+rebuild_raid6 (struct raid56_buffer *buffers, grub_uint64_t nstripes,
+ grub_uint64_t csize, grub_uint64_t parities_pos, void *dest,
+ grub_uint64_t stripen)
+
+{
+ grub_raid6_recover_gen (buffers, nstripes, stripen, parities_pos,
+ dest, 0, csize, 0, raid6_recover_read_buffer);
+}
+
static grub_err_t
raid56_read_retry (struct grub_btrfs_data *data,
struct grub_btrfs_chunk_item *chunk,
- grub_uint64_t stripe_offset,
- grub_uint64_t csize, void *buf)
+ grub_uint64_t stripe_offset, grub_uint64_t stripen,
+ grub_uint64_t csize, void *buf, grub_uint64_t parities_pos)
{
struct raid56_buffer *buffers;
grub_uint64_t nstripes = grub_le_to_cpu16 (chunk->nstripes);
@@ -776,6 +802,15 @@ raid56_read_retry (struct grub_btrfs_data *data,
ret = GRUB_ERR_READ_ERROR;
goto cleanup;
}
+ else if (failed_devices > 2 && (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID6))
+ {
+ grub_dprintf ("btrfs",
+ "not enough disks for RAID 6: total %" PRIuGRUB_UINT64_T
+ ", missing %" PRIuGRUB_UINT64_T "\n",
+ nstripes, failed_devices);
+ ret = GRUB_ERR_READ_ERROR;
+ goto cleanup;
+ }
else
grub_dprintf ("btrfs",
"enough disks for RAID 5: total %"
@@ -786,7 +821,7 @@ raid56_read_retry (struct grub_btrfs_data *data,
if (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5)
rebuild_raid5 (buf, buffers, nstripes, csize);
else
- grub_dprintf ("btrfs", "called rebuild_raid6(), NOT IMPLEMENTED\n");
+ rebuild_raid6 (buffers, nstripes, csize, parities_pos, buf, stripen);

ret = GRUB_ERR_NONE;
cleanup:
@@ -876,9 +911,11 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
unsigned redundancy = 1;
unsigned i, j;
int is_raid56;
+ grub_uint64_t parities_pos = 0;

- is_raid56 = !!(grub_le_to_cpu64 (chunk->type) &
- GRUB_BTRFS_CHUNK_TYPE_RAID5);
+ is_raid56 = !!(grub_le_to_cpu64 (chunk->type) &
+ (GRUB_BTRFS_CHUNK_TYPE_RAID5 |
+ GRUB_BTRFS_CHUNK_TYPE_RAID6));

if (grub_le_to_cpu64 (chunk->size) <= off)
{
@@ -1027,6 +1064,17 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
*/
grub_divmod64 (high + stripen, nstripes, &stripen);

+ /*
+ * parities_pos is equal to "(high - nparities) % nstripes"
+ * (see the diagram above).
+ * However "high - nparities" can be negative, eg. when high
+ * == 0 leading to an incorrect computation.
+ * "high + nstripes - nparities" is always positive and in
+ * modulo nstripes is equal to "(high - nparities) % nstripes"
+ */
+ grub_divmod64 (high + nstripes - nparities, nstripes,
+ &parities_pos);
+
stripe_offset = low + chunk_stripe_length * high;
csize = chunk_stripe_length - low;

@@ -1067,7 +1115,7 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_errno = GRUB_ERR_NONE;
if (err)
err = raid56_read_retry (data, chunk, stripe_offset,
- csize, buf);
+ stripen, csize, buf, parities_pos);
}
else
for (i = 0; i < redundancy; i++)
--
2.19.1
Goffredo Baroncelli
2018-10-22 17:29:37 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

Add support for recovery for a RAID 5 btrfs profile. In addition
it is added some code as preparatory work for RAID 6 recovery code.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 159 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 154 insertions(+), 5 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index ea97f0502..965888da4 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -29,6 +29,7 @@
#include <minilzo.h>
#include <grub/i18n.h>
#include <grub/btrfs.h>
+#include <grub/crypto.h>

GRUB_MOD_LICENSE ("GPLv3+");

@@ -665,6 +666,138 @@ btrfs_read_from_chunk (struct grub_btrfs_data *data,
return err;
}

+struct raid56_buffer {
+ void *buf;
+ int data_is_valid;
+};
+
+static void
+rebuild_raid5 (char *dest, struct raid56_buffer *buffers,
+ grub_uint64_t nstripes, grub_uint64_t csize)
+{
+ grub_uint64_t i;
+ int first;
+
+ for(i = 0; buffers[i].data_is_valid && i < nstripes; i++);
+
+ if (i == nstripes)
+ {
+ grub_dprintf ("btrfs", "called rebuild_raid5(), but all disks are OK\n");
+ return;
+ }
+
+ grub_dprintf ("btrfs", "rebuilding RAID 5 stripe #%" PRIuGRUB_UINT64_T "\n", i);
+
+ for (i = 0, first = 1; i < nstripes; i++)
+ {
+ if (!buffers[i].data_is_valid)
+ continue;
+
+ if (first) {
+ grub_memcpy(dest, buffers[i].buf, csize);
+ first = 0;
+ } else
+ grub_crypto_xor (dest, dest, buffers[i].buf, csize);
+ }
+}
+
+static grub_err_t
+raid56_read_retry (struct grub_btrfs_data *data,
+ struct grub_btrfs_chunk_item *chunk,
+ grub_uint64_t stripe_offset,
+ grub_uint64_t csize, void *buf)
+{
+ struct raid56_buffer *buffers;
+ grub_uint64_t nstripes = grub_le_to_cpu16 (chunk->nstripes);
+ grub_uint64_t chunk_type = grub_le_to_cpu64 (chunk->type);
+ grub_err_t ret = GRUB_ERR_OUT_OF_MEMORY;
+ grub_uint64_t i, failed_devices;
+
+ buffers = grub_zalloc (sizeof(*buffers) * nstripes);
+ if (!buffers)
+ goto cleanup;
+
+ for (i = 0; i < nstripes; i++)
+ {
+ buffers[i].buf = grub_zalloc (csize);
+ if (!buffers[i].buf)
+ goto cleanup;
+ }
+
+ for (failed_devices = 0, i = 0; i < nstripes; i++)
+ {
+ struct grub_btrfs_chunk_stripe *stripe;
+ grub_disk_addr_t paddr;
+ grub_device_t dev;
+ grub_err_t err;
+
+ /* The struct grub_btrfs_chunk_stripe array lives behind struct
+ grub_btrfs_chunk_item. */
+ stripe = (struct grub_btrfs_chunk_stripe *) (chunk + 1) + i;
+
+ paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;
+ grub_dprintf ("btrfs", "reading paddr %" PRIxGRUB_UINT64_T
+ " from stripe ID %" PRIxGRUB_UINT64_T "\n", paddr,
+ stripe->device_id);
+
+ dev = find_device (data, stripe->device_id);
+ if (!dev)
+ {
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " FAILED (dev ID %"
+ PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
+ failed_devices++;
+ continue;
+ }
+
+ err = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
+ paddr & (GRUB_DISK_SECTOR_SIZE - 1),
+ csize, buffers[i].buf);
+ if (err == GRUB_ERR_NONE)
+ {
+ buffers[i].data_is_valid = 1;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " OK (dev ID %"
+ PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
+ }
+ else
+ {
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T
+ " READ FAILED (dev ID %" PRIxGRUB_UINT64_T ")\n", i,
+ stripe->device_id);
+ failed_devices++;
+ }
+ }
+
+ if (failed_devices > 1 && (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5))
+ {
+ grub_dprintf ("btrfs",
+ "not enough disks for RAID 5: total %" PRIuGRUB_UINT64_T
+ ", missing %" PRIuGRUB_UINT64_T "\n",
+ nstripes, failed_devices);
+ ret = GRUB_ERR_READ_ERROR;
+ goto cleanup;
+ }
+ else
+ grub_dprintf ("btrfs",
+ "enough disks for RAID 5: total %"
+ PRIuGRUB_UINT64_T ", missing %" PRIuGRUB_UINT64_T "\n",
+ nstripes, failed_devices);
+
+ /* We have enough disks. So, rebuild the data. */
+ if (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5)
+ rebuild_raid5 (buf, buffers, nstripes, csize);
+ else
+ grub_dprintf ("btrfs", "called rebuild_raid6(), NOT IMPLEMENTED\n");
+
+ ret = GRUB_ERR_NONE;
+ cleanup:
+ if (buffers)
+ for (i = 0; i < nstripes; i++)
+ grub_free (buffers[i].buf);
+ grub_free (buffers);
+
+ return ret;
+}
+
static grub_err_t
grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
void *buf, grub_size_t size, int recursion_depth)
@@ -742,6 +875,10 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_uint16_t nstripes;
unsigned redundancy = 1;
unsigned i, j;
+ int is_raid56;
+
+ is_raid56 = !!(grub_le_to_cpu64 (chunk->type) &
+ GRUB_BTRFS_CHUNK_TYPE_RAID5);

if (grub_le_to_cpu64 (chunk->size) <= off)
{
@@ -921,17 +1058,29 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_dprintf ("btrfs", "reading laddr 0x%" PRIxGRUB_UINT64_T "\n",
addr);

- for (i = 0; i < redundancy; i++)
+ if (is_raid56)
{
err = btrfs_read_from_chunk (data, chunk, stripen,
stripe_offset,
- i, /* redundancy */
+ 0, /* no mirror */
csize, buf);
- if (!err)
- break;
grub_errno = GRUB_ERR_NONE;
+ if (err)
+ err = raid56_read_retry (data, chunk, stripe_offset,
+ csize, buf);
}
- if (i != redundancy)
+ else
+ for (i = 0; i < redundancy; i++)
+ {
+ err = btrfs_read_from_chunk (data, chunk, stripen,
+ stripe_offset,
+ i, /* redundancy */
+ csize, buf);
+ if (!err)
+ break;
+ grub_errno = GRUB_ERR_NONE;
+ }
+ if (!err)
break;
}
if (err)
--
2.19.1
Goffredo Baroncelli
2018-10-22 17:29:31 UTC
Permalink
From: Goffredo Baroncelli <***@inwind.it>

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
Signed-off-by: Daniel Kiper <***@oracle.com>
Reviewed-by: Daniel Kiper <***@oracle.com>
---
grub-core/fs/btrfs.c | 73 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 73 insertions(+)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index be195448d..9122169aa 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -119,6 +119,8 @@ struct grub_btrfs_chunk_item
#define GRUB_BTRFS_CHUNK_TYPE_RAID1 0x10
#define GRUB_BTRFS_CHUNK_TYPE_DUPLICATED 0x20
#define GRUB_BTRFS_CHUNK_TYPE_RAID10 0x40
+#define GRUB_BTRFS_CHUNK_TYPE_RAID5 0x80
+#define GRUB_BTRFS_CHUNK_TYPE_RAID6 0x100
grub_uint8_t dummy2[0xc];
grub_uint16_t nstripes;
grub_uint16_t nsubstripes;
@@ -764,6 +766,77 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
stripe_offset = low + chunk_stripe_length
* high;
csize = chunk_stripe_length - low;
+ break;
+ }
+ case GRUB_BTRFS_CHUNK_TYPE_RAID5:
+ case GRUB_BTRFS_CHUNK_TYPE_RAID6:
+ {
+ grub_uint64_t nparities, stripe_nr, high, low;
+
+ redundancy = 1; /* no redundancy for now */
+
+ if (grub_le_to_cpu64 (chunk->type) & GRUB_BTRFS_CHUNK_TYPE_RAID5)
+ {
+ grub_dprintf ("btrfs", "RAID5\n");
+ nparities = 1;
+ }
+ else
+ {
+ grub_dprintf ("btrfs", "RAID6\n");
+ nparities = 2;
+ }
+
+ /*
+ * RAID 6 layout consists of several stripes spread over
+ * the disks, e.g.:
+ *
+ * Disk_0 Disk_1 Disk_2 Disk_3
+ * A0 B0 P0 Q0
+ * Q1 A1 B1 P1
+ * P2 Q2 A2 B2
+ *
+ * Note: placement of the parities depend on row number.
+ *
+ * Pay attention that the btrfs terminology may differ from
+ * terminology used in other RAID implementations, e.g. LVM,
+ * dm or md. The main difference is that btrfs calls contiguous
+ * block of data on a given disk, e.g. A0, stripe instead of chunk.
+ *
+ * The variables listed below have following meaning:
+ * - stripe_nr is the stripe number excluding the parities
+ * (A0 = 0, B0 = 1, A1 = 2, B1 = 3, etc.),
+ * - high is the row number (0 for A0...Q0, 1 for Q1...P1, etc.),
+ * - stripen is the disk number in a row (0 for A0, Q1, P2,
+ * 1 for B0, A1, Q2, etc.),
+ * - off is the logical address to read,
+ * - chunk_stripe_length is the size of a stripe (typically 64 KiB),
+ * - nstripes is the number of disks in a row,
+ * - low is the offset of the data inside a stripe,
+ * - stripe_offset is the data offset in an array,
+ * - csize is the "potential" data to read; it will be reduced
+ * to size if the latter is smaller,
+ * - nparities is the number of parities (1 for RAID 5, 2 for
+ * RAID 6); used only in RAID 5/6 code.
+ */
+ stripe_nr = grub_divmod64 (off, chunk_stripe_length, &low);
+
+ /*
+ * stripen is computed without the parities
+ * (0 for A0, A1, A2, 1 for B0, B1, B2, etc.).
+ */
+ high = grub_divmod64 (stripe_nr, nstripes - nparities, &stripen);
+
+ /*
+ * The stripes are spread over the disks. Every each row their
+ * positions are shifted by 1 place. So, the real disks number
+ * change. Hence, we have to take into account current row number
+ * modulo nstripes (0 for A0, 1 for A1, 2 for A2, etc.).
+ */
+ grub_divmod64 (high + stripen, nstripes, &stripen);
+
+ stripe_offset = low + chunk_stripe_length * high;
+ csize = chunk_stripe_length - low;
+
break;
}
default:
--
2.19.1
Continue reading on narkive:
Loading...