From c960cded34bd1df78c4b3b496e0d0e5b0a6ac2dc Mon Sep 17 00:00:00 2001 From: Krzysztof Mazur Date: Fri, 22 Jul 2011 22:39:26 +0200 Subject: [PATCH 48/84] lsbd: fix possible read/rewrite race The LSBD performs logical sector to physical sector mapping and just starts physical sector READ request. However the lsbd_thread can relocate sector between this mapping and read from the underlying device. This could lead to silent data corruption. This patch fixes this problem by checking mapping also after read from the underlying device and retrying read request when this mapping is changed. --- drivers/block/lsbd.c | 68 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 22 deletions(-) diff --git a/drivers/block/lsbd.c b/drivers/block/lsbd.c index 6e3176f..04dd929 100644 --- a/drivers/block/lsbd.c +++ b/drivers/block/lsbd.c @@ -1415,6 +1415,7 @@ static int lsbd_read_mirrored(struct lsbd *p, struct buffer_head *bh, unsigned int sector) { struct buffer_head *rbh; + int ret = 0; rbh = __lsbd_get_buffer(p); rbh->b_data = bh->b_data; @@ -1430,12 +1431,12 @@ static int lsbd_read_mirrored(struct lsbd *p, struct buffer_head *bh, generic_make_request(READ, rbh); wait_on_buffer(rbh); - if (buffer_uptodate(rbh)) - bh->b_end_io(bh, 1); + if (!buffer_uptodate(rbh)) + ret = -EIO; brelse(rbh); __lsbd_put_buffer(p, rbh); - return 0; + return ret; } /** @@ -1484,7 +1485,8 @@ static int lsbd_make_read(struct lsbd *p, int rw, struct buffer_head *bh) { unsigned int partition = lsbd_minor_to_partition(MINOR(bh->b_rdev)); unsigned int lsector = LSBD_SECT_INVALID; - unsigned int sector; + unsigned int sector, sector2; + int ret; lsector = bh->b_rsector >> 3; @@ -1498,6 +1500,7 @@ static int lsbd_make_read(struct lsbd *p, int rw, struct buffer_head *bh) sector = p->lcache[lsector]; read_unlock(&p->lcache_lock); lsbd_debug(p, "read %d, mapped to %d\n", lsector, sector); +retry: if (sector == LSBD_SECT_INVALID) { buffer_IO_error(bh); return 0; @@ -1514,26 +1517,23 @@ static int lsbd_make_read(struct lsbd *p, int rw, struct buffer_head *bh) } /* - * reading non-mirrored device is quite simple - requests are - * just remapped to lower level device + * read from mirrored device is more tricky, if single + * read fails second mirror can be used. */ - if (!p->mirrored) { - bh->b_rsector = (unsigned long) sector << 3; - lsbd_debug(p, "mapped to physical %ld\n", bh->b_rsector); + ret = lsbd_read_mirrored(p, bh, sector); + if (likely(!ret)) + goto read_ok; - bh->b_rdev = p->dev; - return 1; - } + p->read_errors++; /* - * read from mirrored device is more tricky, if single - * read fails second mirror can be used. + * For non-mirrored devices we must just give up and return I/O + * error. */ - lsbd_read_mirrored(p, bh, sector); - if (buffer_uptodate(bh)) + if (!p->mirrored) { + buffer_IO_error(bh); return 0; - - p->read_errors++; + } /* try to switch this sector to mirror and retry */ write_lock(&p->lcache_lock); @@ -1547,17 +1547,41 @@ static int lsbd_make_read(struct lsbd *p, int rw, struct buffer_head *bh) buffer_IO_error(bh); return 0; } - lsbd_read_mirrored(p, bh, sector); - + ret = lsbd_read_mirrored(p, bh, sector); /* TODO: rewrite this sector */ - if (buffer_uptodate(bh)) - return 0; + if (!ret) + goto read_ok; lsbd_info(p, "uncorrectable error sector %d (physical %d and %d)\n", lsector, sector ^ p->sectors_per_block, sector); p->uncorrectable++; buffer_IO_error(bh); return 0; + +read_ok: + /* + * Currently we allow for race with sector reallocation and + * we need to detect and correct it here. + * + * We performed logical -> physical sector mapping and issued + * READ request. However the lsbd_thread could relocate + * this logical sector and rewrite physical sector. + * + * Currently we check if mapping before and after READ was the same. + * In extreme case, when this logical sector was relocated twice, + * this can be not sufficient. + */ + read_lock(&p->lcache_lock); + sector2 = p->lcache[lsector]; + read_unlock(&p->lcache_lock); + + if (unlikely(sector != sector2)) { + sector = sector2; + goto retry; + } + + bh->b_end_io(bh, 1); + return 0; } /** -- 1.8.4.652.g0d6e0ce