From 2ca4358d0fc67f4511a30b94b339b5e291c6c944 Mon Sep 17 00:00:00 2001 From: Krzysztof Mazur Date: Tue, 25 Jan 2011 23:01:30 +0100 Subject: [PATCH 15/84] LSBD: add initial support for mirroring This patch adds initial support for mirroring. --- drivers/block/lsbd.c | 290 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 275 insertions(+), 15 deletions(-) diff --git a/drivers/block/lsbd.c b/drivers/block/lsbd.c index abcca2b..126c45c 100644 --- a/drivers/block/lsbd.c +++ b/drivers/block/lsbd.c @@ -79,6 +79,7 @@ struct lsbd { unsigned long long epoch; unsigned int cur_block; unsigned int clean_block; + int mirrored; lsbd_lcache_t *lcache; unsigned int cur_lcache; @@ -104,7 +105,7 @@ struct lsbd_request { int move; }; -#if 0 +#if 1 #define lsbd_debug(c, fmt, ...) \ printk(KERN_DEBUG "lsbd%d: "fmt, (c)->id, ## __VA_ARGS__) #else @@ -245,6 +246,34 @@ struct buffer_head *lsbd_bread(struct lsbd *p, int block, int size) return bread(p->dev, block * p->sectors_per_block, size); } +static struct buffer_head *__lsbd_get_buffer(struct lsbd *p) +{ + struct buffer_head *bh; + + do { + bh = kmem_cache_alloc(bh_cachep, SLAB_NOIO); + if (bh) + break; + + run_task_queue(&tq_disk); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ / 10); + } while (1); + memset(bh, 0, sizeof(*bh)); + + bh->b_size = p->sector_size; + bh->b_dev = p->dev; + bh->b_state = (1 << BH_Req) | (1 << BH_Mapped); + + bh->b_page = NULL; + bh->b_data = NULL; + + init_waitqueue_head(&bh->b_wait); + bh->b_rdev = p->dev; + atomic_set(&bh->b_count, 1); + return bh; +} + static struct buffer_head *lsbd_get_buffer(struct lsbd *p) { struct buffer_head *bh; @@ -265,9 +294,9 @@ static struct buffer_head *lsbd_get_buffer(struct lsbd *p) bh->b_state = (1 << BH_Req) | (1 << BH_Mapped); /* - * easy way out, although it does waste some memory for < PAGE_SIZE - * blocks... if highmem bounce buffering can get away with it, - * so can we :-) + * easy way out, although it does waste some memory + * for < PAGE_SIZE blocks... if highmem bounce buffering + * can get away with it, so can we :-) */ do { bh->b_page = alloc_page(GFP_NOIO); @@ -280,12 +309,19 @@ static struct buffer_head *lsbd_get_buffer(struct lsbd *p) } while (1); bh->b_data = page_address(bh->b_page); + init_waitqueue_head(&bh->b_wait); bh->b_rdev = p->dev; atomic_set(&bh->b_count, 1); return bh; } +static void __lsbd_put_buffer(struct lsbd *p, struct buffer_head *bh) +{ + if (bh) + kmem_cache_free(bh_cachep, bh); +} + static void lsbd_put_buffer(struct lsbd *p, struct buffer_head *bh) { if (bh) { @@ -313,6 +349,26 @@ struct buffer_head *lsbd_getblk(struct lsbd *p, int block, unsigned int sector) return bh; } +/** + * __lsbs_getblk - allocate LSBD buffer head + * @p: LSBD device + * @block: block number + * @sector: sector offset in block + * + * This function NULL on failure; pointer to allocated buffer otherwise. + */ +struct buffer_head *__lsbd_getblk(struct lsbd *p, int block, + unsigned int sector) +{ + struct buffer_head *bh; + + bh = __lsbd_get_buffer(p); + bh->b_blocknr = block * p->sectors_per_block + sector; + bh->b_rsector = (block * p->sectors_per_block + sector) + * (p->sector_size >> 9); + return bh; +} + /* * Default synchronous end-of-IO handler.. Just mark it up-to-date and * unlock the buffer. This is what ll_rw_block uses too. @@ -325,14 +381,14 @@ void lsbd_end_buffer_io_sync(struct buffer_head *bh, int uptodate) } /** - * lsbs_sread - read data + * __lsbs_sread - read data * @p: LSBD device * @block: block number * @sector: sector offset in block * * This function NULL on failure; pointer to allocated buffer otherwise. */ -struct buffer_head *lsbd_sread(struct lsbd *p, int block, unsigned int sector) +struct buffer_head *__lsbd_sread(struct lsbd *p, int block, unsigned int sector) { struct buffer_head *bh; @@ -357,6 +413,24 @@ struct buffer_head *lsbd_sread(struct lsbd *p, int block, unsigned int sector) } /** + * lsbs_sread - read data + * @p: LSBD device + * @block: block number + * @sector: sector offset in block + * + * This function NULL on failure; pointer to allocated buffer otherwise. + */ +struct buffer_head *lsbd_sread(struct lsbd *p, int block, unsigned int sector) +{ + struct buffer_head *bh; + + bh = __lsbd_sread(p, block, sector); + if (bh == NULL && p->mirrored) + bh = __lsbd_sread(p, block ^ 1, sector); + return bh; +} + +/** * lsbs_checksum - compute LSBD checksum * @buf: pointer to buffer * @count: number of bytes to checksum, must be a multiple of 4 @@ -782,6 +856,10 @@ static int lsbd_load_params(struct lsbd *p) p->sector_size = sector_size; } + p->mirrored = 0; + if (be32_to_cpu(b->flags) & LSBD_MIRRORED) + p->mirrored = 1; + if (blocks < p->blocks) { p->blocks = blocks; p->psectors = p->blocks * p->sectors_per_block; @@ -795,10 +873,11 @@ static int lsbd_load_params(struct lsbd *p) return -EINVAL; } - lsbd_info(p, "logical size %d KiB, physical %d KiB\n", + lsbd_info(p, "logical size %d KiB, physical %d KiB%s\n", p->lsectors * (p->sector_size >> 10), p->blocks * p->sectors_per_block - * (p->sector_size >> 10)); + * (p->sector_size >> 10), + p->mirrored ? "(mirrored)" : ""); lsbd_sizes[p->id] = p->lsectors * (p->sector_size >> 10); p->blocks_to_write = 0; @@ -943,12 +1022,14 @@ static int lsbd_write_blocks(struct lsbd *p, unsigned int num) */ static int lsbd_partition(struct lsbd *p, struct lsbd_part_info *part) { + int mirrored = 0; + lsbd_debug(p, "part: %d %d %d %d\n", part->num, part->flags, part->start, part->size); if (part->num > 16) return -EINVAL; - if (part->flags) + if (part->flags & ~LSBD_PART_MIRRORED) return -EINVAL; /* @@ -967,10 +1048,17 @@ static int lsbd_partition(struct lsbd *p, struct lsbd_part_info *part) if (sectors < 1024) sectors += 128; + if (part->flags & LSBD_PART_MIRRORED) { + mirrored = 1; + sectors /= 2; + } + /* don't allow more than 7/8 utilization */ if (part->size / 7 > sectors / 8) return -EINVAL; + p->mirrored = mirrored; + if (part->size > p->lsectors) { lsbd_lcache_t *l; lsbd_lcache_t *ol; @@ -1157,6 +1245,77 @@ static int lsbd_queue_bh(struct lsbd *p, struct buffer_head *bh, #define lsbd_partition(x) ((x) & ((1 << PART_BITS) - 1)) +static unsigned int rand_seed = 152L; + +/* + * simple random number generator from glibc 2.5 (TYPE_0) + */ +static unsigned int rand(void) +{ + rand_seed = (rand_seed * 1103515245 + 12345) & 0x7fffffff; + return rand_seed; +} + +int fault_gen(void) +{ + if (!(rand() >> 21)) + return 1; + return 0; +} + +/** + * lsbd_read_mirrored - read from mirrored LSBD device + * @p: LSBD device + * @bh: buffer head + * @sector: physical sector to read + */ +static int lsbd_read_mirrored(struct lsbd *p, struct buffer_head *bh, + unsigned int sector) +{ + struct buffer_head *rbh; + + if (fault_gen()) { + lsbd_debug(p, "simulated I/O error, sector %d\n", sector); + return -1; + } + + rbh = __lsbd_get_buffer(p); + rbh->b_data = bh->b_data; + rbh->b_blocknr = sector; + rbh->b_rsector = sector * (p->sector_size >> 9); + set_bit(BH_Sync, &rbh->b_state); + + lock_buffer(rbh); + rbh->b_end_io = lsbd_end_buffer_io_sync; + set_bit(BH_Req, &rbh->b_state); + set_bit(BH_Launder, &rbh->b_state); + get_bh(rbh); + generic_make_request(READ, rbh); + + wait_on_buffer(rbh); + if (buffer_uptodate(rbh)) + bh->b_end_io(bh, 1); + + brelse(rbh); + __lsbd_put_buffer(p, rbh); + return 0; +} + +/** + * lsbd_switch_mirror - locate mirror block of specified sector + * @p: LSBD device + * @lsector: logical sector + * @sector: current logical sector mapping + */ +static int lsbd_switch_mirror(struct lsbd *p, unsigned int lsector, + unsigned int sector) +{ + lsbd_debug(p, "switching mirror of %d from %d to %d\n", + lsector, sector, sector ^ p->sectors_per_block); + p->lcache[lsector] = sector ^ p->sectors_per_block; + return 0; +} + static int lsbd_make_request(request_queue_t *q, int rw, struct buffer_head *bh) { @@ -1203,14 +1362,44 @@ static int lsbd_make_request(request_queue_t *q, int rw, buffer_IO_error(bh); return 0; } - bh->b_rsector = (unsigned long) p->lcache[lsector] << 3; - lsbd_debug(p, "mapped to physical %ld\n", bh->b_rsector); - bh->b_rdev = p->dev; /* - * Let the main block layer submit the IO and resolve recursion: ++ * for non-mirrored devices read can be redirected ++ * to lower level device. */ - return 1; + if (!p->mirrored) { + bh->b_rsector = (unsigned long) p->lcache[lsector] << 3; + lsbd_debug(p, "mapped to physical %ld\n", + bh->b_rsector); + + bh->b_rdev = p->dev; + return 1; + } + /* + * read from mirrored device is more tricky, if single + * read fails second mirror can be used. + */ + lsbd_read_mirrored(p, bh, sector); + if (buffer_uptodate(bh)) + return 0; + + /* try to switch this sector to mirror and retry */ + lsbd_switch_mirror(p, lsector, sector); + sector = p->lcache[lsector]; + if (sector >= p->psectors) { + lsbd_info(p, "invalid lcache entry: %d -> %d\n", + lsector, sector); + buffer_IO_error(bh); + return 0; + } + lsbd_read_mirrored(p, bh, sector); + + /* TODO: rewrite this sector */ + if (buffer_uptodate(bh)) + return 0; + + buffer_IO_error(bh); + return 0; } /* @@ -1259,11 +1448,18 @@ void lsbd_clean_block(struct lsbd *p) unsigned long sectors_max; unsigned long i; unsigned int ptab_offset; + int mirrored = 0; block++; if (block >= p->blocks) block = 0; + if (!(block & 1) && p->mirrored) { + if (block + 1 >= p->blocks) + block = 0; + mirrored = 1; + } + bh = lsbd_sread(p, block, 0); if (bh == NULL) goto out; @@ -1311,6 +1507,11 @@ void lsbd_clean_block(struct lsbd *p) lsbd_put_buffer(p, bh); out: + if (mirrored) { + block++; + if (block >= p->blocks) + block = 0; + } p->clean_block = block; } @@ -1322,6 +1523,7 @@ out: static int initialize_block(struct lsbd *p, struct lsbd_block *b) { unsigned int i; + uint32_t lsbd_flags; b->magic = cpu_to_be64(LSBD_BLOCK_MAGIC); b->version = cpu_to_be32(0); @@ -1337,6 +1539,11 @@ static int initialize_block(struct lsbd *p, struct lsbd_block *b) b->sector_size = cpu_to_be32(p->sector_size); b->sectors_per_block = cpu_to_be32(p->sectors_per_block); + lsbd_flags = 0; + if (p->mirrored) + lsbd_flags |= LSBD_MIRRORED; + b->flags = cpu_to_be32(lsbd_flags); + b->blocks = cpu_to_be32(p->blocks); b->lsectors = cpu_to_be32(p->lsectors); @@ -1411,18 +1618,28 @@ static struct lsbd_request *lsbd_request_dequeue(struct lsbd *p) static int lsbd_write_block(struct lsbd *p) { struct buffer_head **bh; + struct buffer_head **bh2; struct buffer_head **wbh; struct lsbd_block *b; struct lsbd_sect *sects; struct lsbd_request *r; unsigned int i; + int mirrored = 0; p->cur_block++; if (p->cur_block >= p->blocks) p->cur_block = 0; - lsbd_debug(p, "writting block %d\n", p->cur_block); + if (!(p->cur_block & 1) && p->mirrored) { + mirrored = 1; + if (p->cur_block + 1 >= p->blocks) + p->cur_block = 0; + } + + lsbd_debug(p, "writting block %d%s\n", p->cur_block, + mirrored ? "(mirrored)" : ""); + /* FIXME: don't change cur_block on error */ bh = kmalloc(p->sectors_per_block * sizeof(*bh), GFP_NOIO); if (bh == NULL) return -ENOMEM; @@ -1433,11 +1650,31 @@ static int lsbd_write_block(struct lsbd *p) return -ENOMEM; } + bh2 = NULL; + if (mirrored) { + bh2 = kmalloc(p->sectors_per_block * sizeof(*bh2), GFP_NOIO); + if (bh2 == NULL) { + kfree(bh); + kfree(wbh); + return -ENOMEM; + } + } + for (i = 0; i < p->sectors_per_block; i++) { bh[i] = lsbd_getblk(p, p->cur_block, i); BUG_ON(bh[i] == NULL); lock_buffer(bh[i]); } + if (mirrored) { + for (i = 0; i < p->sectors_per_block; i++) { + bh2[i] = __lsbd_getblk(p, p->cur_block + 1, i); + BUG_ON(bh2[i] == NULL); + lock_buffer(bh2[i]); + + bh2[i]->b_page = bh[i]->b_page; + bh2[i]->b_data = bh[i]->b_data; + } + } memset(bh[0]->b_data, 0, p->sector_size); b = (void *) bh[0]->b_data; @@ -1497,10 +1734,26 @@ static int lsbd_write_block(struct lsbd *p) get_bh(bh[i]); generic_make_request(WRITE, bh[i]); } + if (mirrored) { + for (i = 0; i < p->sectors_per_block; i++) { + mark_buffer_uptodate(bh2[i], 1); + bh2[i]->b_end_io = lsbd_end_buffer_io_sync; + set_bit(BH_Req, &bh2[i]->b_state); + set_bit(BH_Launder, &bh2[i]->b_state); + get_bh(bh2[i]); + generic_make_request(WRITE, bh2[i]); + } + } lsbd_debug(p, "waiting for write\n"); for (i = 0; i < p->sectors_per_block; i++) { wait_on_buffer(bh[i]); + + if (mirrored) { + wait_on_buffer(bh2[i]); + brelse(bh2[i]); + __lsbd_put_buffer(p, bh2[i]); + } brelse(bh[i]); lsbd_put_buffer(p, bh[i]); @@ -1510,6 +1763,13 @@ static int lsbd_write_block(struct lsbd *p) lsbd_debug(p, "done\n"); kfree(wbh); kfree(bh); + kfree(bh2); + + if (mirrored) { + p->cur_block++; + if (p->cur_block >= p->blocks) + p->cur_block = 0; + } return 0; } -- 1.8.4.652.g0d6e0ce