diff options
author | Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp> | 2009-04-06 19:01:35 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-04-07 08:31:15 -0700 |
commit | 8a9d2191e9f43bbcd256a9a6871bd73434c83f2f (patch) | |
tree | e2c8967703c3f2bf15dcfbe78eafdfe0c328c3c4 /fs/nilfs2 | |
parent | d25006523d0b9e49fd097b2e974e7c8c05bd7f54 (diff) |
nilfs2: operations for the_nilfs core object
This adds functions on the_nilfs object, which keeps shared resources and
states among a read/write mount and snapshots mounts going individually.
the_nilfs is allocated per block device; it is created when user first
mount a snapshot or a read/write mount on the device, then it is reused
for successive mounts. It will be freed when all mount instances on the
device are detached.
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/nilfs2')
-rw-r--r-- | fs/nilfs2/the_nilfs.c | 524 |
1 files changed, 524 insertions, 0 deletions
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c new file mode 100644 index 000000000000..852e0bf3a3c5 --- /dev/null +++ b/fs/nilfs2/the_nilfs.c @@ -0,0 +1,524 @@ +/* + * the_nilfs.c - the_nilfs shared structure. + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Ryusuke Konishi <ryusuke@osrg.net> + * + */ + +#include <linux/buffer_head.h> +#include <linux/slab.h> +#include <linux/blkdev.h> +#include <linux/backing-dev.h> +#include "nilfs.h" +#include "segment.h" +#include "alloc.h" +#include "cpfile.h" +#include "sufile.h" +#include "dat.h" +#include "seglist.h" +#include "segbuf.h" + +void nilfs_set_last_segment(struct the_nilfs *nilfs, + sector_t start_blocknr, u64 seq, __u64 cno) +{ + spin_lock(&nilfs->ns_last_segment_lock); + nilfs->ns_last_pseg = start_blocknr; + nilfs->ns_last_seq = seq; + nilfs->ns_last_cno = cno; + spin_unlock(&nilfs->ns_last_segment_lock); +} + +/** + * alloc_nilfs - allocate the_nilfs structure + * @bdev: block device to which the_nilfs is related + * + * alloc_nilfs() allocates memory for the_nilfs and + * initializes its reference count and locks. + * + * Return Value: On success, pointer to the_nilfs is returned. + * On error, NULL is returned. + */ +struct the_nilfs *alloc_nilfs(struct block_device *bdev) +{ + struct the_nilfs *nilfs; + + nilfs = kzalloc(sizeof(*nilfs), GFP_KERNEL); + if (!nilfs) + return NULL; + + nilfs->ns_bdev = bdev; + atomic_set(&nilfs->ns_count, 1); + atomic_set(&nilfs->ns_writer_refcount, -1); + atomic_set(&nilfs->ns_ndirtyblks, 0); + init_rwsem(&nilfs->ns_sem); + mutex_init(&nilfs->ns_writer_mutex); + INIT_LIST_HEAD(&nilfs->ns_supers); + spin_lock_init(&nilfs->ns_last_segment_lock); + nilfs->ns_gc_inodes_h = NULL; + INIT_LIST_HEAD(&nilfs->ns_used_segments); + init_rwsem(&nilfs->ns_segctor_sem); + init_waitqueue_head(&nilfs->ns_cleanerd_wq); + + return nilfs; +} + +/** + * put_nilfs - release a reference to the_nilfs + * @nilfs: the_nilfs structure to be released + * + * put_nilfs() decrements a reference counter of the_nilfs. + * If the reference count reaches zero, the_nilfs is freed. + */ +void put_nilfs(struct the_nilfs *nilfs) +{ + if (!atomic_dec_and_test(&nilfs->ns_count)) + return; + /* + * Increment of ns_count never occur below because the caller + * of get_nilfs() holds at least one reference to the_nilfs. + * Thus its exclusion control is not required here. + */ + might_sleep(); + if (nilfs_loaded(nilfs)) { + nilfs_dispose_used_segments(nilfs); + nilfs_mdt_clear(nilfs->ns_sufile); + nilfs_mdt_destroy(nilfs->ns_sufile); + nilfs_mdt_clear(nilfs->ns_cpfile); + nilfs_mdt_destroy(nilfs->ns_cpfile); + nilfs_mdt_clear(nilfs->ns_dat); + nilfs_mdt_destroy(nilfs->ns_dat); + /* XXX: how and when to clear nilfs->ns_gc_dat? */ + nilfs_mdt_destroy(nilfs->ns_gc_dat); + } + if (nilfs_init(nilfs)) { + nilfs_destroy_gccache(nilfs); + brelse(nilfs->ns_sbh); + } + kfree(nilfs); +} + +static int nilfs_load_super_root(struct the_nilfs *nilfs, + struct nilfs_sb_info *sbi, sector_t sr_block) +{ + struct buffer_head *bh_sr; + struct nilfs_super_root *raw_sr; + unsigned dat_entry_size, segment_usage_size, checkpoint_size; + unsigned inode_size; + int err; + + err = nilfs_read_super_root_block(sbi->s_super, sr_block, &bh_sr, 1); + if (unlikely(err)) + return err; + + down_read(&nilfs->ns_sem); + dat_entry_size = le16_to_cpu(nilfs->ns_sbp->s_dat_entry_size); + checkpoint_size = le16_to_cpu(nilfs->ns_sbp->s_checkpoint_size); + segment_usage_size = le16_to_cpu(nilfs->ns_sbp->s_segment_usage_size); + up_read(&nilfs->ns_sem); + + inode_size = nilfs->ns_inode_size; + + err = -ENOMEM; + nilfs->ns_dat = nilfs_mdt_new( + nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP); + if (unlikely(!nilfs->ns_dat)) + goto failed; + + nilfs->ns_gc_dat = nilfs_mdt_new( + nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP); + if (unlikely(!nilfs->ns_gc_dat)) + goto failed_dat; + + nilfs->ns_cpfile = nilfs_mdt_new( + nilfs, NULL, NILFS_CPFILE_INO, NILFS_CPFILE_GFP); + if (unlikely(!nilfs->ns_cpfile)) + goto failed_gc_dat; + + nilfs->ns_sufile = nilfs_mdt_new( + nilfs, NULL, NILFS_SUFILE_INO, NILFS_SUFILE_GFP); + if (unlikely(!nilfs->ns_sufile)) + goto failed_cpfile; + + err = nilfs_palloc_init_blockgroup(nilfs->ns_dat, dat_entry_size); + if (unlikely(err)) + goto failed_sufile; + + err = nilfs_palloc_init_blockgroup(nilfs->ns_gc_dat, dat_entry_size); + if (unlikely(err)) + goto failed_sufile; + + nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat); + nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size, + sizeof(struct nilfs_cpfile_header)); + nilfs_mdt_set_entry_size(nilfs->ns_sufile, segment_usage_size, + sizeof(struct nilfs_sufile_header)); + + err = nilfs_mdt_read_inode_direct( + nilfs->ns_dat, bh_sr, NILFS_SR_DAT_OFFSET(inode_size)); + if (unlikely(err)) + goto failed_sufile; + + err = nilfs_mdt_read_inode_direct( + nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(inode_size)); + if (unlikely(err)) + goto failed_sufile; + + err = nilfs_mdt_read_inode_direct( + nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(inode_size)); + if (unlikely(err)) + goto failed_sufile; + + raw_sr = (struct nilfs_super_root *)bh_sr->b_data; + nilfs->ns_nongc_ctime = le64_to_cpu(raw_sr->sr_nongc_ctime); + + failed: + brelse(bh_sr); + return err; + + failed_sufile: + nilfs_mdt_destroy(nilfs->ns_sufile); + + failed_cpfile: + nilfs_mdt_destroy(nilfs->ns_cpfile); + + failed_gc_dat: + nilfs_mdt_destroy(nilfs->ns_gc_dat); + + failed_dat: + nilfs_mdt_destroy(nilfs->ns_dat); + goto failed; +} + +static void nilfs_init_recovery_info(struct nilfs_recovery_info *ri) +{ + memset(ri, 0, sizeof(*ri)); + INIT_LIST_HEAD(&ri->ri_used_segments); +} + +static void nilfs_clear_recovery_info(struct nilfs_recovery_info *ri) +{ + nilfs_dispose_segment_list(&ri->ri_used_segments); +} + +/** + * load_nilfs - load and recover the nilfs + * @nilfs: the_nilfs structure to be released + * @sbi: nilfs_sb_info used to recover past segment + * + * load_nilfs() searches and load the latest super root, + * attaches the last segment, and does recovery if needed. + * The caller must call this exclusively for simultaneous mounts. + */ +int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) +{ + struct nilfs_recovery_info ri; + unsigned int s_flags = sbi->s_super->s_flags; + int really_read_only = bdev_read_only(nilfs->ns_bdev); + unsigned valid_fs; + int err = 0; + + nilfs_init_recovery_info(&ri); + + down_write(&nilfs->ns_sem); + valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS); + up_write(&nilfs->ns_sem); + + if (!valid_fs && (s_flags & MS_RDONLY)) { + printk(KERN_INFO "NILFS: INFO: recovery " + "required for readonly filesystem.\n"); + if (really_read_only) { + printk(KERN_ERR "NILFS: write access " + "unavailable, cannot proceed.\n"); + err = -EROFS; + goto failed; + } + printk(KERN_INFO "NILFS: write access will " + "be enabled during recovery.\n"); + sbi->s_super->s_flags &= ~MS_RDONLY; + } + + err = nilfs_search_super_root(nilfs, sbi, &ri); + if (unlikely(err)) { + printk(KERN_ERR "NILFS: error searching super root.\n"); + goto failed; + } + + err = nilfs_load_super_root(nilfs, sbi, ri.ri_super_root); + if (unlikely(err)) { + printk(KERN_ERR "NILFS: error loading super root.\n"); + goto failed; + } + + if (!valid_fs) { + err = nilfs_recover_logical_segments(nilfs, sbi, &ri); + if (unlikely(err)) { + nilfs_mdt_destroy(nilfs->ns_cpfile); + nilfs_mdt_destroy(nilfs->ns_sufile); + nilfs_mdt_destroy(nilfs->ns_dat); + goto failed; + } + if (ri.ri_need_recovery == NILFS_RECOVERY_SR_UPDATED) { + down_write(&nilfs->ns_sem); + nilfs_update_last_segment(sbi, 0); + up_write(&nilfs->ns_sem); + } + } + + set_nilfs_loaded(nilfs); + + failed: + nilfs_clear_recovery_info(&ri); + sbi->s_super->s_flags = s_flags; + return err; +} + +static unsigned long long nilfs_max_size(unsigned int blkbits) +{ + unsigned int max_bits; + unsigned long long res = MAX_LFS_FILESIZE; /* page cache limit */ + + max_bits = blkbits + NILFS_BMAP_KEY_BIT; /* bmap size limit */ + if (max_bits < 64) + res = min_t(unsigned long long, res, (1ULL << max_bits) - 1); + return res; +} + +static int +nilfs_store_disk_layout(struct the_nilfs *nilfs, struct super_block *sb, + struct nilfs_super_block *sbp) +{ + if (le32_to_cpu(sbp->s_rev_level) != NILFS_CURRENT_REV) { + printk(KERN_ERR "NILFS: revision mismatch " + "(superblock rev.=%d.%d, current rev.=%d.%d). " + "Please check the version of mkfs.nilfs.\n", + le32_to_cpu(sbp->s_rev_level), + le16_to_cpu(sbp->s_minor_rev_level), + NILFS_CURRENT_REV, NILFS_MINOR_REV); + return -EINVAL; + } + nilfs->ns_inode_size = le16_to_cpu(sbp->s_inode_size); + nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino); + + nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); + if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) { + printk(KERN_ERR "NILFS: too short segment. \n"); + return -EINVAL; + } + + nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block); + nilfs->ns_nsegments = le64_to_cpu(sbp->s_nsegments); + nilfs->ns_r_segments_percentage = + le32_to_cpu(sbp->s_r_segments_percentage); + nilfs->ns_nrsvsegs = + max_t(unsigned long, NILFS_MIN_NRSVSEGS, + DIV_ROUND_UP(nilfs->ns_nsegments * + nilfs->ns_r_segments_percentage, 100)); + nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed); + return 0; +} + +/** + * init_nilfs - initialize a NILFS instance. + * @nilfs: the_nilfs structure + * @sbi: nilfs_sb_info + * @sb: super block + * @data: mount options + * + * init_nilfs() performs common initialization per block device (e.g. + * reading the super block, getting disk layout information, initializing + * shared fields in the_nilfs). It takes on some portion of the jobs + * typically done by a fill_super() routine. This division arises from + * the nature that multiple NILFS instances may be simultaneously + * mounted on a device. + * For multiple mounts on the same device, only the first mount + * invokes these tasks. + * + * Return Value: On success, 0 is returned. On error, a negative error + * code is returned. + */ +int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) +{ + struct super_block *sb = sbi->s_super; + struct buffer_head *sbh; + struct nilfs_super_block *sbp; + struct backing_dev_info *bdi; + int blocksize; + int err = 0; + + down_write(&nilfs->ns_sem); + if (nilfs_init(nilfs)) { + /* Load values from existing the_nilfs */ + sbp = nilfs->ns_sbp; + err = nilfs_store_magic_and_option(sb, sbp, data); + if (err) + goto out; + + blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); + if (sb->s_blocksize != blocksize && + !sb_set_blocksize(sb, blocksize)) { + printk(KERN_ERR "NILFS: blocksize %d unfit to device\n", + blocksize); + err = -EINVAL; + } + sb->s_maxbytes = nilfs_max_size(sb->s_blocksize_bits); + goto out; + } + + sbp = nilfs_load_super_block(sb, &sbh); + if (!sbp) { + err = -EINVAL; + goto out; + } + err = nilfs_store_magic_and_option(sb, sbp, data); + if (err) + goto failed_sbh; + + blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); + if (sb->s_blocksize != blocksize) { + sbp = nilfs_reload_super_block(sb, &sbh, blocksize); + if (!sbp) { + err = -EINVAL; + goto out; + /* not failed_sbh; sbh is released automatically + when reloading fails. */ + } + } + nilfs->ns_blocksize_bits = sb->s_blocksize_bits; + + err = nilfs_store_disk_layout(nilfs, sb, sbp); + if (err) + goto failed_sbh; + + sb->s_maxbytes = nilfs_max_size(sb->s_blocksize_bits); + + nilfs->ns_mount_state = le16_to_cpu(sbp->s_state); + nilfs->ns_sbh = sbh; + nilfs->ns_sbp = sbp; + + bdi = nilfs->ns_bdev->bd_inode_backing_dev_info; + if (!bdi) + bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info; + nilfs->ns_bdi = bdi ? : &default_backing_dev_info; + + /* Finding last segment */ + nilfs->ns_last_pseg = le64_to_cpu(sbp->s_last_pseg); + nilfs->ns_last_cno = le64_to_cpu(sbp->s_last_cno); + nilfs->ns_last_seq = le64_to_cpu(sbp->s_last_seq); + + nilfs->ns_seg_seq = nilfs->ns_last_seq; + nilfs->ns_segnum = + nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg); + nilfs->ns_cno = nilfs->ns_last_cno + 1; + if (nilfs->ns_segnum >= nilfs->ns_nsegments) { + printk(KERN_ERR "NILFS invalid last segment number.\n"); + err = -EINVAL; + goto failed_sbh; + } + /* Dummy values */ + nilfs->ns_free_segments_count = + nilfs->ns_nsegments - (nilfs->ns_segnum + 1); + + /* Initialize gcinode cache */ + err = nilfs_init_gccache(nilfs); + if (err) + goto failed_sbh; + + set_nilfs_init(nilfs); + err = 0; + out: + up_write(&nilfs->ns_sem); + return err; + + failed_sbh: + brelse(sbh); + goto out; +} + +int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks) +{ + struct inode *dat = nilfs_dat_inode(nilfs); + unsigned long ncleansegs; + int err; + + down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + err = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile, &ncleansegs); + up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + if (likely(!err)) + *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; + return err; +} + +void nilfs_dispose_used_segments(struct the_nilfs *nilfs) +{ + struct nilfs_segment_entry *ent, *n; + + /* nilfs->sem must be locked by the caller. */ + if (!nilfs_loaded(nilfs)) + return; + + list_for_each_entry_safe(ent, n, &nilfs->ns_used_segments, list) { + list_del_init(&ent->list); + nilfs_segment_usage_clear_volatile_active(ent->raw_su); + nilfs_close_segment_entry(ent, nilfs->ns_sufile); + nilfs_free_segment_entry(ent); + } +} + +int nilfs_near_disk_full(struct the_nilfs *nilfs) +{ + struct inode *sufile = nilfs->ns_sufile; + unsigned long ncleansegs, nincsegs; + int ret; + + ret = nilfs_sufile_get_ncleansegs(sufile, &ncleansegs); + if (likely(!ret)) { + nincsegs = atomic_read(&nilfs->ns_ndirtyblks) / + nilfs->ns_blocks_per_segment + 1; + if (ncleansegs <= nilfs->ns_nrsvsegs + nincsegs) + ret++; + } + return ret; +} + +int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno, + int snapshot_mount) +{ + struct nilfs_sb_info *sbi; + int ret = 0; + + down_read(&nilfs->ns_sem); + if (cno == 0 || cno > nilfs->ns_cno) + goto out_unlock; + + list_for_each_entry(sbi, &nilfs->ns_supers, s_list) { + if (sbi->s_snapshot_cno == cno && + (!snapshot_mount || nilfs_test_opt(sbi, SNAPSHOT))) { + /* exclude read-only mounts */ + ret++; + break; + } + } + /* for protecting recent checkpoints */ + if (cno >= nilfs_last_cno(nilfs)) + ret++; + + out_unlock: + up_read(&nilfs->ns_sem); + return ret; +} |