diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-11-12 12:39:21 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-11-12 12:39:21 +0100 |
commit | 708b8eae0fd532af73ea8350e6dcc10255ff7376 (patch) | |
tree | f336436934fd79bc91aff7112a9beb10bc4e839f /fs | |
parent | d98d38f2014ab79f28c126ff175d034891f7aefc (diff) | |
parent | f21f237cf55494c3a4209de323281a3b0528da10 (diff) |
Merge branch 'linus' into core/locking
Diffstat (limited to 'fs')
69 files changed, 1328 insertions, 683 deletions
diff --git a/fs/Makefile b/fs/Makefile index 2168c902d5ca..d9f8afe6f0c4 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -81,8 +81,6 @@ obj-$(CONFIG_HUGETLBFS) += hugetlbfs/ obj-$(CONFIG_CODA_FS) += coda/ obj-$(CONFIG_MINIX_FS) += minix/ obj-$(CONFIG_FAT_FS) += fat/ -obj-$(CONFIG_MSDOS_FS) += msdos/ -obj-$(CONFIG_VFAT_FS) += vfat/ obj-$(CONFIG_BFS_FS) += bfs/ obj-$(CONFIG_ISO9660_FS) += isofs/ obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 625abf5422e2..33bf8cbfd051 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -128,9 +128,10 @@ static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) */ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) { - int err = -EINVAL; + int err; - if (check_dev_ioctl_version(cmd, param)) { + err = check_dev_ioctl_version(cmd, param); + if (err) { AUTOFS_WARN("invalid device control module version " "supplied for cmd(0x%08x)", cmd); goto out; diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index cde2f8e8935a..4b6fb3f628c0 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -56,12 +56,23 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) mntget(mnt); dget(dentry); - if (!autofs4_follow_mount(&mnt, &dentry)) + if (!follow_down(&mnt, &dentry)) goto done; - /* This is an autofs submount, we can't expire it */ - if (is_autofs4_dentry(dentry)) - goto done; + if (is_autofs4_dentry(dentry)) { + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + + /* This is an autofs submount, we can't expire it */ + if (sbi->type == AUTOFS_TYPE_INDIRECT) + goto done; + + /* + * Otherwise it's an offset mount and we need to check + * if we can umount its mount, if there is one. + */ + if (!d_mountpoint(dentry)) + goto done; + } /* Update the expiry counter if fs is busy */ if (!may_umount_tree(mnt)) { diff --git a/fs/block_dev.c b/fs/block_dev.c index 88a776fa0ef6..db831efbdbbd 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -986,7 +986,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) { struct gendisk *disk; - struct hd_struct *part = NULL; int ret; int partno; int perm = 0; @@ -1004,24 +1003,25 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) return ret; } - ret = -ENXIO; - lock_kernel(); + ret = -ENXIO; disk = get_gendisk(bdev->bd_dev, &partno); if (!disk) goto out_unlock_kernel; - part = disk_get_part(disk, partno); - if (!part) - goto out_unlock_kernel; mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { bdev->bd_disk = disk; - bdev->bd_part = part; bdev->bd_contains = bdev; if (!partno) { struct backing_dev_info *bdi; + + ret = -ENXIO; + bdev->bd_part = disk_get_part(disk, partno); + if (!bdev->bd_part) + goto out_clear; + if (disk->fops->open) { ret = disk->fops->open(bdev, mode); if (ret) @@ -1049,18 +1049,17 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_contains = whole; bdev->bd_inode->i_data.backing_dev_info = whole->bd_inode->i_data.backing_dev_info; + bdev->bd_part = disk_get_part(disk, partno); if (!(disk->flags & GENHD_FL_UP) || - !part || !part->nr_sects) { + !bdev->bd_part || !bdev->bd_part->nr_sects) { ret = -ENXIO; goto out_clear; } - bd_set_size(bdev, (loff_t)part->nr_sects << 9); + bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); } } else { - disk_put_part(part); put_disk(disk); module_put(disk->fops->owner); - part = NULL; disk = NULL; if (bdev->bd_contains == bdev) { if (bdev->bd_disk->fops->open) { @@ -1080,6 +1079,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) return 0; out_clear: + disk_put_part(bdev->bd_part); bdev->bd_disk = NULL; bdev->bd_part = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; @@ -1091,7 +1091,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) out_unlock_kernel: unlock_kernel(); - disk_put_part(part); if (disk) module_put(disk->fops->owner); put_disk(disk); diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 8f528ea24c48..8855331b2fba 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -4,7 +4,11 @@ Various fixes to make delete of open files behavior more predictable (when delete of an open file fails we mark the file as "delete-on-close" in a way that more servers accept, but only if we can first rename the file to a temporary name). Add experimental support for more safely -handling fcntl(F_SETLEASE). +handling fcntl(F_SETLEASE). Convert cifs to using blocking tcp +sends, and also let tcp autotune the socket send and receive buffers. +This reduces the number of EAGAIN errors returned by TCP/IP in +high stress workloads (and the number of retries on socket writes +when sending large SMBWriteX requests). Version 1.54 ------------ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index c791e5b5a914..1cb1189f24e0 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -141,6 +141,8 @@ struct TCP_Server_Info { char versionMajor; char versionMinor; bool svlocal:1; /* local server or remote */ + bool noblocksnd; /* use blocking sendmsg */ + bool noautotune; /* do not autotune send buf sizes */ atomic_t socketUseCount; /* number of open cifs sessions on socket */ atomic_t inFlight; /* number of requests on the wire to server */ #ifdef CONFIG_CIFS_STATS2 diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 0cff7fe986e8..6f21ecb85ce5 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -36,7 +36,7 @@ extern void cifs_buf_release(void *); extern struct smb_hdr *cifs_small_buf_get(void); extern void cifs_small_buf_release(void *); extern int smb_send(struct socket *, struct smb_hdr *, - unsigned int /* length */ , struct sockaddr *); + unsigned int /* length */ , struct sockaddr *, bool); extern unsigned int _GetXid(void); extern void _FreeXid(unsigned int); #define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current->fsuid)); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 843a85fb8b9a..d5eac48fc415 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1536,7 +1536,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, __u32 bytes_sent; __u16 byte_count; - /* cFYI(1,("write at %lld %d bytes",offset,count));*/ + /* cFYI(1, ("write at %lld %d bytes", offset, count));*/ if (tcon->ses == NULL) return -ECONNABORTED; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 71b7661e2260..e9f9248cb3fe 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -92,6 +92,8 @@ struct smb_vol { bool seal:1; /* request transport encryption on share */ bool nodfs:1; /* Do not request DFS, even if available */ bool local_lease:1; /* check leases only on local system, not remote */ + bool noblocksnd:1; + bool noautotune:1; unsigned int rsize; unsigned int wsize; unsigned int sockopt; @@ -102,9 +104,11 @@ struct smb_vol { static int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, char *netb_name, - char *server_netb_name); + char *server_netb_name, + bool noblocksnd, + bool nosndbuf); /* ipv6 never set sndbuf size */ static int ipv6_connect(struct sockaddr_in6 *psin_server, - struct socket **csocket); + struct socket **csocket, bool noblocksnd); /* @@ -191,12 +195,13 @@ cifs_reconnect(struct TCP_Server_Info *server) try_to_freeze(); if (server->protocolType == IPV6) { rc = ipv6_connect(&server->addr.sockAddr6, - &server->ssocket); + &server->ssocket, server->noautotune); } else { rc = ipv4_connect(&server->addr.sockAddr, &server->ssocket, server->workstation_RFC1001_name, - server->server_RFC1001_name); + server->server_RFC1001_name, + server->noblocksnd, server->noautotune); } if (rc) { cFYI(1, ("reconnect error %d", rc)); @@ -1192,6 +1197,10 @@ cifs_parse_mount_options(char *options, const char *devname, /* ignore */ } else if (strnicmp(data, "rw", 2) == 0) { vol->rw = true; + } else if (strnicmp(data, "noblocksend", 11) == 0) { + vol->noblocksnd = 1; + } else if (strnicmp(data, "noautotune", 10) == 0) { + vol->noautotune = 1; } else if ((strnicmp(data, "suid", 4) == 0) || (strnicmp(data, "nosuid", 6) == 0) || (strnicmp(data, "exec", 4) == 0) || @@ -1518,7 +1527,8 @@ static void rfc1002mangle(char *target, char *source, unsigned int length) static int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, - char *netbios_name, char *target_name) + char *netbios_name, char *target_name, + bool noblocksnd, bool noautotune) { int rc = 0; int connected = 0; @@ -1590,11 +1600,16 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, (*csocket)->sk->sk_sndbuf, (*csocket)->sk->sk_rcvbuf, (*csocket)->sk->sk_rcvtimeo)); (*csocket)->sk->sk_rcvtimeo = 7 * HZ; + if (!noblocksnd) + (*csocket)->sk->sk_sndtimeo = 3 * HZ; + /* make the bufsizes depend on wsize/rsize and max requests */ - if ((*csocket)->sk->sk_sndbuf < (200 * 1024)) - (*csocket)->sk->sk_sndbuf = 200 * 1024; - if ((*csocket)->sk->sk_rcvbuf < (140 * 1024)) - (*csocket)->sk->sk_rcvbuf = 140 * 1024; + if (noautotune) { + if ((*csocket)->sk->sk_sndbuf < (200 * 1024)) + (*csocket)->sk->sk_sndbuf = 200 * 1024; + if ((*csocket)->sk->sk_rcvbuf < (140 * 1024)) + (*csocket)->sk->sk_rcvbuf = 140 * 1024; + } /* send RFC1001 sessinit */ if (psin_server->sin_port == htons(RFC1001_PORT)) { @@ -1631,7 +1646,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, /* sizeof RFC1002_SESSION_REQUEST with no scope */ smb_buf->smb_buf_length = 0x81000044; rc = smb_send(*csocket, smb_buf, 0x44, - (struct sockaddr *)psin_server); + (struct sockaddr *)psin_server, noblocksnd); kfree(ses_init_buf); msleep(1); /* RFC1001 layer in at least one server requires very short break before negprot @@ -1651,7 +1666,8 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, } static int -ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket) +ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket, + bool noblocksnd) { int rc = 0; int connected = 0; @@ -1720,6 +1736,9 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket) the default. sock_setsockopt not used because it expects user space buffer */ (*csocket)->sk->sk_rcvtimeo = 7 * HZ; + if (!noblocksnd) + (*csocket)->sk->sk_sndtimeo = 3 * HZ; + return rc; } @@ -1983,11 +2002,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, cFYI(1, ("attempting ipv6 connect")); /* BB should we allow ipv6 on port 139? */ /* other OS never observed in Wild doing 139 with v6 */ - rc = ipv6_connect(&sin_server6, &csocket); + rc = ipv6_connect(&sin_server6, &csocket, + volume_info.noblocksnd); } else rc = ipv4_connect(&sin_server, &csocket, volume_info.source_rfc1001_name, - volume_info.target_rfc1001_name); + volume_info.target_rfc1001_name, + volume_info.noblocksnd, + volume_info.noautotune); if (rc < 0) { cERROR(1, ("Error connecting to IPv4 socket. " "Aborting operation")); @@ -2002,6 +2024,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, sock_release(csocket); goto out; } else { + srvTcp->noblocksnd = volume_info.noblocksnd; + srvTcp->noautotune = volume_info.noautotune; memcpy(&srvTcp->addr.sockAddr, &sin_server, sizeof(struct sockaddr_in)); atomic_set(&srvTcp->inFlight, 0); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 62d8bd8f14c0..ead1a3bb0256 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1824,7 +1824,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, pTcon = cifs_sb->tcon; pagevec_init(&lru_pvec, 0); - cFYI(DBG2, ("rpages: num pages %d", num_pages)); + cFYI(DBG2, ("rpages: num pages %d", num_pages)); for (i = 0; i < num_pages; ) { unsigned contig_pages; struct page *tmp_page; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index d54fa8aeaea9..ff8c68de4a92 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1361,9 +1361,11 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, CIFS_MOUNT_MAP_SPECIAL_CHR); if (tmprc == 0 && (info_buf_source->UniqueId == - info_buf_target->UniqueId)) + info_buf_target->UniqueId)) { /* same file, POSIX says that this is a noop */ + rc = 0; goto cifs_rename_exit; + } } /* else ... BB we could add the same check for Windows by checking the UniqueId via FILE_INTERNAL_INFO */ diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index bf0e6d8e382a..ff8243a8fe3e 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -161,7 +161,7 @@ void DeleteTconOplockQEntries(struct cifsTconInfo *tcon) int smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, - unsigned int smb_buf_length, struct sockaddr *sin) + unsigned int smb_buf_length, struct sockaddr *sin, bool noblocksnd) { int rc = 0; int i = 0; @@ -178,7 +178,10 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, smb_msg.msg_namelen = sizeof(struct sockaddr); smb_msg.msg_control = NULL; smb_msg.msg_controllen = 0; - smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ + if (noblocksnd) + smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; + else + smb_msg.msg_flags = MSG_NOSIGNAL; /* smb header is converted in header_assemble. bcc and rest of SMB word area, and byte area if necessary, is converted to littleendian in @@ -229,8 +232,8 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, } static int -smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, - struct sockaddr *sin) +smb_send2(struct TCP_Server_Info *server, struct kvec *iov, int n_vec, + struct sockaddr *sin, bool noblocksnd) { int rc = 0; int i = 0; @@ -240,6 +243,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, unsigned int total_len; int first_vec = 0; unsigned int smb_buf_length = smb_buffer->smb_buf_length; + struct socket *ssocket = server->ssocket; if (ssocket == NULL) return -ENOTSOCK; /* BB eventually add reconnect code here */ @@ -248,7 +252,10 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, smb_msg.msg_namelen = sizeof(struct sockaddr); smb_msg.msg_control = NULL; smb_msg.msg_controllen = 0; - smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ + if (noblocksnd) + smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; + else + smb_msg.msg_flags = MSG_NOSIGNAL; /* smb header is converted in header_assemble. bcc and rest of SMB word area, and byte area if necessary, is converted to littleendian in @@ -283,8 +290,11 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, if (rc < 0) break; - if (rc >= total_len) { - WARN_ON(rc > total_len); + if (rc == total_len) { + total_len = 0; + break; + } else if (rc > total_len) { + cERROR(1, ("sent %d requested %d", rc, total_len)); break; } if (rc == 0) { @@ -312,6 +322,16 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, i = 0; /* in case we get ENOSPC on the next send */ } + if ((total_len > 0) && (total_len != smb_buf_length + 4)) { + cFYI(1, ("partial send (%d remaining), terminating session", + total_len)); + /* If we have only sent part of an SMB then the next SMB + could be taken as the remainder of this one. We need + to kill the socket so the server throws away the partial + SMB */ + server->tcpStatus = CifsNeedReconnect; + } + if (rc < 0) { cERROR(1, ("Error %d sending data on socket to server", rc)); } else @@ -518,8 +538,9 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, #ifdef CONFIG_CIFS_STATS2 atomic_inc(&ses->server->inSend); #endif - rc = smb_send2(ses->server->ssocket, iov, n_vec, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + rc = smb_send2(ses->server, iov, n_vec, + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); #ifdef CONFIG_CIFS_STATS2 atomic_dec(&ses->server->inSend); midQ->when_sent = jiffies; @@ -711,7 +732,8 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, atomic_inc(&ses->server->inSend); #endif rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); #ifdef CONFIG_CIFS_STATS2 atomic_dec(&ses->server->inSend); midQ->when_sent = jiffies; @@ -851,7 +873,8 @@ send_nt_cancel(struct cifsTconInfo *tcon, struct smb_hdr *in_buf, return rc; } rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); up(&ses->server->tcpSem); return rc; } @@ -941,7 +964,8 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, atomic_inc(&ses->server->inSend); #endif rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); #ifdef CONFIG_CIFS_STATS2 atomic_dec(&ses->server->inSend); midQ->when_sent = jiffies; diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index cfd29da714d1..0376ac66c44a 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -2,7 +2,7 @@ * An implementation of a loadable kernel mode driver providing * multiple kernel/user space bidirectional communications links. * - * Author: Alan Cox <alan@redhat.com> + * Author: Alan Cox <alan@lxorguk.ukuu.org.uk> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 06db79d05c12..6046239465a1 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1251,6 +1251,7 @@ struct kmem_cache *ecryptfs_header_cache_2; /** * ecryptfs_write_headers_virt * @page_virt: The virtual address to write the headers to + * @max: The size of memory allocated at page_virt * @size: Set to the number of bytes written by this function * @crypt_stat: The cryptographic context * @ecryptfs_dentry: The eCryptfs dentry @@ -1278,7 +1279,8 @@ struct kmem_cache *ecryptfs_header_cache_2; * * Returns zero on success */ -static int ecryptfs_write_headers_virt(char *page_virt, size_t *size, +static int ecryptfs_write_headers_virt(char *page_virt, size_t max, + size_t *size, struct ecryptfs_crypt_stat *crypt_stat, struct dentry *ecryptfs_dentry) { @@ -1296,7 +1298,7 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t *size, offset += written; rc = ecryptfs_generate_key_packet_set((page_virt + offset), crypt_stat, ecryptfs_dentry, &written, - PAGE_CACHE_SIZE - offset); + max - offset); if (rc) ecryptfs_printk(KERN_WARNING, "Error generating key packet " "set; rc = [%d]\n", rc); @@ -1368,14 +1370,14 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) goto out; } /* Released in this function */ - virt = kzalloc(crypt_stat->num_header_bytes_at_front, GFP_KERNEL); + virt = (char *)get_zeroed_page(GFP_KERNEL); if (!virt) { printk(KERN_ERR "%s: Out of memory\n", __func__); rc = -ENOMEM; goto out; } - rc = ecryptfs_write_headers_virt(virt, &size, crypt_stat, - ecryptfs_dentry); + rc = ecryptfs_write_headers_virt(virt, PAGE_CACHE_SIZE, &size, + crypt_stat, ecryptfs_dentry); if (unlikely(rc)) { printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n", __func__, rc); @@ -1393,8 +1395,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) goto out_free; } out_free: - memset(virt, 0, crypt_stat->num_header_bytes_at_front); - kfree(virt); + free_page((unsigned long)virt); out: return rc; } diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 18eaa78ecb4e..5dec6d1356c4 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -281,7 +281,8 @@ void ext3_abort (struct super_block * sb, const char * function, EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; sb->s_flags |= MS_RDONLY; EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; - journal_abort(EXT3_SB(sb)->s_journal, -EIO); + if (EXT3_SB(sb)->s_journal) + journal_abort(EXT3_SB(sb)->s_journal, -EIO); } void ext3_warning (struct super_block * sb, const char * function, @@ -390,11 +391,14 @@ static void ext3_put_super (struct super_block * sb) { struct ext3_sb_info *sbi = EXT3_SB(sb); struct ext3_super_block *es = sbi->s_es; - int i; + int i, err; ext3_xattr_put_super(sb); - if (journal_destroy(sbi->s_journal) < 0) + err = journal_destroy(sbi->s_journal); + sbi->s_journal = NULL; + if (err < 0) ext3_abort(sb, __func__, "Couldn't clean up the journal"); + if (!(sb->s_flags & MS_RDONLY)) { EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); es->s_state = cpu_to_le16(sbi->s_mount_state); @@ -2386,13 +2390,12 @@ static void ext3_write_super (struct super_block * sb) static int ext3_sync_fs(struct super_block *sb, int wait) { - tid_t target; - sb->s_dirt = 0; - if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { - if (wait) - log_wait_commit(EXT3_SB(sb)->s_journal, target); - } + if (wait) + ext3_force_commit(sb); + else + journal_start_commit(EXT3_SB(sb)->s_journal, NULL); + return 0; } diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index b9821be709bd..d2003cdc36aa 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -589,21 +589,23 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, return; } -int ext4_claim_free_blocks(struct ext4_sb_info *sbi, - s64 nblocks) +/** + * ext4_has_free_blocks() + * @sbi: in-core super block structure. + * @nblocks: number of needed blocks + * + * Check if filesystem has nblocks free & available for allocation. + * On success return 1, return 0 on failure. + */ +int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) { - s64 free_blocks, dirty_blocks; - s64 root_blocks = 0; + s64 free_blocks, dirty_blocks, root_blocks; struct percpu_counter *fbc = &sbi->s_freeblocks_counter; struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; free_blocks = percpu_counter_read_positive(fbc); dirty_blocks = percpu_counter_read_positive(dbc); - - if (!capable(CAP_SYS_RESOURCE) && - sbi->s_resuid != current->fsuid && - (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) - root_blocks = ext4_r_blocks_count(sbi->s_es); + root_blocks = ext4_r_blocks_count(sbi->s_es); if (free_blocks - (nblocks + root_blocks + dirty_blocks) < EXT4_FREEBLOCKS_WATERMARK) { @@ -616,57 +618,32 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi, } } /* Check whether we have space after - * accounting for current dirty blocks + * accounting for current dirty blocks & root reserved blocks. */ - if (free_blocks < ((root_blocks + nblocks) + dirty_blocks)) - /* we don't have free space */ - return -ENOSPC; + if (free_blocks >= ((root_blocks + nblocks) + dirty_blocks)) + return 1; + + /* Hm, nope. Are (enough) root reserved blocks available? */ + if (sbi->s_resuid == current->fsuid || + ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || + capable(CAP_SYS_RESOURCE)) { + if (free_blocks >= (nblocks + dirty_blocks)) + return 1; + } - /* Add the blocks to nblocks */ - percpu_counter_add(dbc, nblocks); return 0; } -/** - * ext4_has_free_blocks() - * @sbi: in-core super block structure. - * @nblocks: number of neeed blocks - * - * Check if filesystem has free blocks available for allocation. - * Return the number of blocks avaible for allocation for this request - * On success, return nblocks - */ -ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, +int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) { - s64 free_blocks, dirty_blocks; - s64 root_blocks = 0; - struct percpu_counter *fbc = &sbi->s_freeblocks_counter; - struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; - - free_blocks = percpu_counter_read_positive(fbc); - dirty_blocks = percpu_counter_read_positive(dbc); - - if (!capable(CAP_SYS_RESOURCE) && - sbi->s_resuid != current->fsuid && - (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) - root_blocks = ext4_r_blocks_count(sbi->s_es); - - if (free_blocks - (nblocks + root_blocks + dirty_blocks) < - EXT4_FREEBLOCKS_WATERMARK) { - free_blocks = percpu_counter_sum(fbc); - dirty_blocks = percpu_counter_sum(dbc); - } - if (free_blocks <= (root_blocks + dirty_blocks)) - /* we don't have free space */ + if (ext4_has_free_blocks(sbi, nblocks)) { + percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks); return 0; - - if (free_blocks - (root_blocks + dirty_blocks) < nblocks) - return free_blocks - (root_blocks + dirty_blocks); - return nblocks; + } else + return -ENOSPC; } - /** * ext4_should_retry_alloc() * @sb: super block diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4880cc3e6727..b0537c827024 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1003,8 +1003,7 @@ extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, ext4_fsblk_t goal, unsigned long *count, int *errp); extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); -extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, - s64 nblocks); +extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); extern void ext4_free_blocks(handle_t *handle, struct inode *inode, ext4_fsblk_t block, unsigned long count, int metadata); extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index fe34d74cfb19..2a117e286e54 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -718,6 +718,8 @@ got: gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); free = ext4_free_blocks_after_init(sb, group, gdp); gdp->bg_free_blocks_count = cpu_to_le16(free); + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, + gdp); } spin_unlock(sb_bgl_lock(sbi, group)); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8dbf6953845b..be21a5ae33cb 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2329,6 +2329,8 @@ static int ext4_da_writepage(struct page *page, unlock_page(page); return 0; } + /* now mark the buffer_heads as dirty and uptodate */ + block_commit_write(page, 0, PAGE_CACHE_SIZE); } if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) @@ -4580,9 +4582,10 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) { if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) - return ext4_indirect_trans_blocks(inode, nrblocks, 0); - return ext4_ext_index_trans_blocks(inode, nrblocks, 0); + return ext4_indirect_trans_blocks(inode, nrblocks, chunk); + return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); } + /* * Account for index blocks, block groups bitmaps and block group * descriptor blocks if modify datablocks and index blocks diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index dfe17a134052..444ad998f72e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4441,6 +4441,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, else if (block >= (entry->start_blk + entry->count)) n = &(*n)->rb_right; else { + ext4_unlock_group(sb, group); ext4_error(sb, __func__, "Double free of blocks %d (%d %d)\n", block, entry->start_blk, entry->count); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index bdddea14e782..e4a241c65dbe 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -333,7 +333,8 @@ void ext4_abort(struct super_block *sb, const char *function, EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; sb->s_flags |= MS_RDONLY; EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; - jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); + if (EXT4_SB(sb)->s_journal) + jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); } void ext4_warning(struct super_block *sb, const char *function, @@ -442,14 +443,16 @@ static void ext4_put_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; - int i; + int i, err; ext4_mb_release(sb); ext4_ext_release(sb); ext4_xattr_put_super(sb); - if (jbd2_journal_destroy(sbi->s_journal) < 0) - ext4_abort(sb, __func__, "Couldn't clean up the journal"); + err = jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; + if (err < 0) + ext4_abort(sb, __func__, "Couldn't clean up the journal"); + if (!(sb->s_flags & MS_RDONLY)) { EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); es->s_state = cpu_to_le16(sbi->s_mount_state); @@ -1455,9 +1458,8 @@ static int ext4_fill_flex_info(struct super_block *sb) /* We allocate both existing and potentially added groups */ flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + - ((sbi->s_es->s_reserved_gdt_blocks +1 ) << - EXT4_DESC_PER_BLOCK_BITS(sb))) / - groups_per_flex; + ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << + EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; sbi->s_flex_groups = kzalloc(flex_group_count * sizeof(struct flex_groups), GFP_KERNEL); if (sbi->s_flex_groups == NULL) { @@ -2882,12 +2884,9 @@ int ext4_force_commit(struct super_block *sb) /* * Ext4 always journals updates to the superblock itself, so we don't * have to propagate any other updates to the superblock on disk at this - * point. Just start an async writeback to get the buffers on their way - * to the disk. - * - * This implicitly triggers the writebehind on sync(). + * point. (We can probably nuke this function altogether, and remove + * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...) */ - static void ext4_write_super(struct super_block *sb) { if (mutex_trylock(&sb->s_lock) != 0) @@ -2897,15 +2896,15 @@ static void ext4_write_super(struct super_block *sb) static int ext4_sync_fs(struct super_block *sb, int wait) { - tid_t target; + int ret = 0; trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); sb->s_dirt = 0; - if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { - if (wait) - jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); - } - return 0; + if (wait) + ret = ext4_force_commit(sb); + else + jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL); + return ret; } /* diff --git a/fs/fat/Makefile b/fs/fat/Makefile index bfb5f06cf2c8..e06190322c1c 100644 --- a/fs/fat/Makefile +++ b/fs/fat/Makefile @@ -3,5 +3,9 @@ # obj-$(CONFIG_FAT_FS) += fat.o +obj-$(CONFIG_VFAT_FS) += vfat.o +obj-$(CONFIG_MSDOS_FS) += msdos.o -fat-objs := cache.o dir.o fatent.o file.o inode.o misc.o +fat-y := cache.o dir.o fatent.o file.o inode.o misc.o +vfat-y := namei_vfat.o +msdos-y := namei_msdos.o diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 3222f51c41cf..b42602298087 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -9,8 +9,8 @@ */ #include <linux/fs.h> -#include <linux/msdos_fs.h> #include <linux/buffer_head.h> +#include "fat.h" /* this must be > 0. */ #define FAT_MAX_CACHE 8 @@ -293,10 +293,12 @@ static int fat_bmap_cluster(struct inode *inode, int cluster) } int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, - unsigned long *mapped_blocks) + unsigned long *mapped_blocks, int create) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); + const unsigned long blocksize = sb->s_blocksize; + const unsigned char blocksize_bits = sb->s_blocksize_bits; sector_t last_block; int cluster, offset; @@ -309,10 +311,21 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, } return 0; } - last_block = (MSDOS_I(inode)->mmu_private + (sb->s_blocksize - 1)) - >> sb->s_blocksize_bits; - if (sector >= last_block) - return 0; + + last_block = (i_size_read(inode) + (blocksize - 1)) >> blocksize_bits; + if (sector >= last_block) { + if (!create) + return 0; + + /* + * ->mmu_private can access on only allocation path. + * (caller must hold ->i_mutex) + */ + last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1)) + >> blocksize_bits; + if (sector >= last_block) + return 0; + } cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits); offset = sector & (sbi->sec_per_clus - 1); diff --git a/fs/fat/dir.c b/fs/fat/dir.c index bae1c3292522..67e058357098 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -16,11 +16,11 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/time.h> -#include <linux/msdos_fs.h> #include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/compat.h> #include <asm/uaccess.h> +#include "fat.h" static inline loff_t fat_make_i_pos(struct super_block *sb, struct buffer_head *bh, @@ -77,7 +77,7 @@ next: *bh = NULL; iblock = *pos >> sb->s_blocksize_bits; - err = fat_bmap(dir, iblock, &phys, &mapped_blocks); + err = fat_bmap(dir, iblock, &phys, &mapped_blocks, 0); if (err || !phys) return -1; /* beyond EOF or error */ @@ -86,7 +86,7 @@ next: *bh = sb_bread(sb, phys); if (*bh == NULL) { printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n", - (unsigned long long)phys); + (llu)phys); /* skip this block */ *pos = (iblock + 1) << sb->s_blocksize_bits; goto next; @@ -373,9 +373,10 @@ parse_record: if (de->attr == ATTR_EXT) { int status = fat_parse_long(inode, &cpos, &bh, &de, &unicode, &nr_slots); - if (status < 0) - return status; - else if (status == PARSE_INVALID) + if (status < 0) { + err = status; + goto end_of_dir; + } else if (status == PARSE_INVALID) continue; else if (status == PARSE_NOT_LONGNAME) goto parse_record; @@ -832,6 +833,7 @@ static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd, #endif /* CONFIG_COMPAT */ const struct file_operations fat_dir_operations = { + .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = fat_readdir, .ioctl = fat_dir_ioctl, @@ -1089,6 +1091,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts) struct msdos_dir_entry *de; sector_t blknr; __le16 date, time; + u8 time_cs; int err, cluster; err = fat_alloc_clusters(dir, &cluster, 1); @@ -1102,7 +1105,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts) goto error_free; } - fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); + fat_time_unix2fat(sbi, ts, &time, &date, &time_cs); de = (struct msdos_dir_entry *)bhs[0]->b_data; /* filling the new directory slots ("." and ".." entries) */ @@ -1112,13 +1115,14 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts) de[0].lcase = de[1].lcase = 0; de[0].time = de[1].time = time; de[0].date = de[1].date = date; - de[0].ctime_cs = de[1].ctime_cs = 0; if (sbi->options.isvfat) { /* extra timestamps */ de[0].ctime = de[1].ctime = time; + de[0].ctime_cs = de[1].ctime_cs = time_cs; de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = date; } else { de[0].ctime = de[1].ctime = 0; + de[0].ctime_cs = de[1].ctime_cs = 0; de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = 0; } de[0].start = cpu_to_le16(cluster); diff --git a/fs/fat/fat.h b/fs/fat/fat.h new file mode 100644 index 000000000000..ea440d65819c --- /dev/null +++ b/fs/fat/fat.h @@ -0,0 +1,329 @@ +#ifndef _FAT_H +#define _FAT_H + +#include <linux/buffer_head.h> +#include <linux/string.h> +#include <linux/nls.h> +#include <linux/fs.h> +#include <linux/mutex.h> +#include <linux/msdos_fs.h> + +/* + * vfat shortname flags + */ +#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */ +#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */ +#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */ +#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */ +#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */ + +struct fat_mount_options { + uid_t fs_uid; + gid_t fs_gid; + unsigned short fs_fmask; + unsigned short fs_dmask; + unsigned short codepage; /* Codepage for shortname conversions */ + char *iocharset; /* Charset used for filename input/display */ + unsigned short shortname; /* flags for shortname display/create rule */ + unsigned char name_check; /* r = relaxed, n = normal, s = strict */ + unsigned short allow_utime;/* permission for setting the [am]time */ + unsigned quiet:1, /* set = fake successful chmods and chowns */ + showexec:1, /* set = only set x bit for com/exe/bat */ + sys_immutable:1, /* set = system files are immutable */ + dotsOK:1, /* set = hidden and system files are named '.filename' */ + isvfat:1, /* 0=no vfat long filename support, 1=vfat support */ + utf8:1, /* Use of UTF-8 character set (Default) */ + unicode_xlate:1, /* create escape sequences for unhandled Unicode */ + numtail:1, /* Does first alias have a numeric '~1' type tail? */ + flush:1, /* write things quickly */ + nocase:1, /* Does this need case conversion? 0=need case conversion*/ + usefree:1, /* Use free_clusters for FAT32 */ + tz_utc:1, /* Filesystem timestamps are in UTC */ + rodir:1; /* allow ATTR_RO for directory */ +}; + +#define FAT_HASH_BITS 8 +#define FAT_HASH_SIZE (1UL << FAT_HASH_BITS) + +/* + * MS-DOS file system in-core superblock data + */ +struct msdos_sb_info { + unsigned short sec_per_clus; /* sectors/cluster */ + unsigned short cluster_bits; /* log2(cluster_size) */ + unsigned int cluster_size; /* cluster size */ + unsigned char fats,fat_bits; /* number of FATs, FAT bits (12 or 16) */ + unsigned short fat_start; + unsigned long fat_length; /* FAT start & length (sec.) */ + unsigned long dir_start; + unsigned short dir_entries; /* root dir start & entries */ + unsigned long data_start; /* first data sector */ + unsigned long max_cluster; /* maximum cluster number */ + unsigned long root_cluster; /* first cluster of the root directory */ + unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ + struct mutex fat_lock; + unsigned int prev_free; /* previously allocated cluster number */ + unsigned int free_clusters; /* -1 if undefined */ + unsigned int free_clus_valid; /* is free_clusters valid? */ + struct fat_mount_options options; + struct nls_table *nls_disk; /* Codepage used on disk */ + struct nls_table *nls_io; /* Charset used for input and display */ + const void *dir_ops; /* Opaque; default directory operations */ + int dir_per_block; /* dir entries per block */ + int dir_per_block_bits; /* log2(dir_per_block) */ + + int fatent_shift; + struct fatent_operations *fatent_ops; + + spinlock_t inode_hash_lock; + struct hlist_head inode_hashtable[FAT_HASH_SIZE]; +}; + +#define FAT_CACHE_VALID 0 /* special case for valid cache */ + +/* + * MS-DOS file system inode data in memory + */ +struct msdos_inode_info { + spinlock_t cache_lru_lock; + struct list_head cache_lru; + int nr_caches; + /* for avoiding the race between fat_free() and fat_get_cluster() */ + unsigned int cache_valid_id; + + /* NOTE: mmu_private is 64bits, so must hold ->i_mutex to access */ + loff_t mmu_private; /* physically allocated size */ + + int i_start; /* first cluster or 0 */ + int i_logstart; /* logical first cluster */ + int i_attrs; /* unused attribute bits */ + loff_t i_pos; /* on-disk position of directory entry or 0 */ + struct hlist_node i_fat_hash; /* hash by i_location */ + struct inode vfs_inode; +}; + +struct fat_slot_info { + loff_t i_pos; /* on-disk position of directory entry */ + loff_t slot_off; /* offset for slot or de start */ + int nr_slots; /* number of slots + 1(de) in filename */ + struct msdos_dir_entry *de; + struct buffer_head *bh; +}; + +static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) +{ + return container_of(inode, struct msdos_inode_info, vfs_inode); +} + +/* + * If ->i_mode can't hold S_IWUGO (i.e. ATTR_RO), we use ->i_attrs to + * save ATTR_RO instead of ->i_mode. + * + * If it's directory and !sbi->options.rodir, ATTR_RO isn't read-only + * bit, it's just used as flag for app. + */ +static inline int fat_mode_can_hold_ro(struct inode *inode) +{ + struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); + mode_t mask; + + if (S_ISDIR(inode->i_mode)) { + if (!sbi->options.rodir) + return 0; + mask = ~sbi->options.fs_dmask; + } else + mask = ~sbi->options.fs_fmask; + + if (!(mask & S_IWUGO)) + return 0; + return 1; +} + +/* Convert attribute bits and a mask to the UNIX mode. */ +static inline mode_t fat_make_mode(struct msdos_sb_info *sbi, + u8 attrs, mode_t mode) +{ + if (attrs & ATTR_RO && !((attrs & ATTR_DIR) && !sbi->options.rodir)) + mode &= ~S_IWUGO; + + if (attrs & ATTR_DIR) + return (mode & ~sbi->options.fs_dmask) | S_IFDIR; + else + return (mode & ~sbi->options.fs_fmask) | S_IFREG; +} + +/* Return the FAT attribute byte for this inode */ +static inline u8 fat_make_attrs(struct inode *inode) +{ + u8 attrs = MSDOS_I(inode)->i_attrs; + if (S_ISDIR(inode->i_mode)) + attrs |= ATTR_DIR; + if (fat_mode_can_hold_ro(inode) && !(inode->i_mode & S_IWUGO)) + attrs |= ATTR_RO; + return attrs; +} + +static inline void fat_save_attrs(struct inode *inode, u8 attrs) +{ + if (fat_mode_can_hold_ro(inode)) + MSDOS_I(inode)->i_attrs = attrs & ATTR_UNUSED; + else + MSDOS_I(inode)->i_attrs = attrs & (ATTR_UNUSED | ATTR_RO); +} + +static inline unsigned char fat_checksum(const __u8 *name) +{ + unsigned char s = name[0]; + s = (s<<7) + (s>>1) + name[1]; s = (s<<7) + (s>>1) + name[2]; + s = (s<<7) + (s>>1) + name[3]; s = (s<<7) + (s>>1) + name[4]; + s = (s<<7) + (s>>1) + name[5]; s = (s<<7) + (s>>1) + name[6]; + s = (s<<7) + (s>>1) + name[7]; s = (s<<7) + (s>>1) + name[8]; + s = (s<<7) + (s>>1) + name[9]; s = (s<<7) + (s>>1) + name[10]; + return s; +} + +static inline sector_t fat_clus_to_blknr(struct msdos_sb_info *sbi, int clus) +{ + return ((sector_t)clus - FAT_START_ENT) * sbi->sec_per_clus + + sbi->data_start; +} + +static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len) +{ +#ifdef __BIG_ENDIAN + while (len--) { + *dst++ = src[0] | (src[1] << 8); + src += 2; + } +#else + memcpy(dst, src, len * 2); +#endif +} + +static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len) +{ +#ifdef __BIG_ENDIAN + while (len--) { + dst[0] = *src & 0x00FF; + dst[1] = (*src & 0xFF00) >> 8; + dst += 2; + src++; + } +#else + memcpy(dst, src, len * 2); +#endif +} + +/* fat/cache.c */ +extern void fat_cache_inval_inode(struct inode *inode); +extern int fat_get_cluster(struct inode *inode, int cluster, + int *fclus, int *dclus); +extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, + unsigned long *mapped_blocks, int create); + +/* fat/dir.c */ +extern const struct file_operations fat_dir_operations; +extern int fat_search_long(struct inode *inode, const unsigned char *name, + int name_len, struct fat_slot_info *sinfo); +extern int fat_dir_empty(struct inode *dir); +extern int fat_subdirs(struct inode *dir); +extern int fat_scan(struct inode *dir, const unsigned char *name, + struct fat_slot_info *sinfo); +extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, + struct msdos_dir_entry **de, loff_t *i_pos); +extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts); +extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots, + struct fat_slot_info *sinfo); +extern int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo); + +/* fat/fatent.c */ +struct fat_entry { + int entry; + union { + u8 *ent12_p[2]; + __le16 *ent16_p; + __le32 *ent32_p; + } u; + int nr_bhs; + struct buffer_head *bhs[2]; +}; + +static inline void fatent_init(struct fat_entry *fatent) +{ + fatent->nr_bhs = 0; + fatent->entry = 0; + fatent->u.ent32_p = NULL; + fatent->bhs[0] = fatent->bhs[1] = NULL; +} + +static inline void fatent_set_entry(struct fat_entry *fatent, int entry) +{ + fatent->entry = entry; + fatent->u.ent32_p = NULL; +} + +static inline void fatent_brelse(struct fat_entry *fatent) +{ + int i; + fatent->u.ent32_p = NULL; + for (i = 0; i < fatent->nr_bhs; i++) + brelse(fatent->bhs[i]); + fatent->nr_bhs = 0; + fatent->bhs[0] = fatent->bhs[1] = NULL; +} + +extern void fat_ent_access_init(struct super_block *sb); +extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent, + int entry); +extern int fat_ent_write(struct inode *inode, struct fat_entry *fatent, + int new, int wait); +extern int fat_alloc_clusters(struct inode *inode, int *cluster, + int nr_cluster); +extern int fat_free_clusters(struct inode *inode, int cluster); +extern int fat_count_free_clusters(struct super_block *sb); + +/* fat/file.c */ +extern int fat_generic_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg); +extern const struct file_operations fat_file_operations; +extern const struct inode_operations fat_file_inode_operations; +extern int fat_setattr(struct dentry * dentry, struct iattr * attr); +extern void fat_truncate(struct inode *inode); +extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat); + +/* fat/inode.c */ +extern void fat_attach(struct inode *inode, loff_t i_pos); +extern void fat_detach(struct inode *inode); +extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos); +extern struct inode *fat_build_inode(struct super_block *sb, + struct msdos_dir_entry *de, loff_t i_pos); +extern int fat_sync_inode(struct inode *inode); +extern int fat_fill_super(struct super_block *sb, void *data, int silent, + const struct inode_operations *fs_dir_inode_ops, int isvfat); + +extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, + struct inode *i2); +/* fat/misc.c */ +extern void fat_fs_panic(struct super_block *s, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))) __cold; +extern void fat_clusters_flush(struct super_block *sb); +extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); +extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, + __le16 __time, __le16 __date, u8 time_cs); +extern void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts, + __le16 *time, __le16 *date, u8 *time_cs); +extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); + +int fat_cache_init(void); +void fat_cache_destroy(void); + +/* helper for printk */ +typedef unsigned long long llu; + +#endif /* !_FAT_H */ diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index fb98b3d847ed..da6eea47872f 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -7,6 +7,7 @@ #include <linux/fs.h> #include <linux/msdos_fs.h> #include <linux/blkdev.h> +#include "fat.h" struct fatent_operations { void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); @@ -92,8 +93,7 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent, err_brelse: brelse(bhs[0]); err: - printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", - (unsigned long long)blocknr); + printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", (llu)blocknr); return -EIO; } @@ -106,7 +106,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent, fatent->bhs[0] = sb_bread(sb, blocknr); if (!fatent->bhs[0]) { printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", - (unsigned long long)blocknr); + (llu)blocknr); return -EIO; } fatent->nr_bhs = 1; @@ -316,10 +316,20 @@ static inline int fat_ent_update_ptr(struct super_block *sb, /* Is this fatent's blocks including this entry? */ if (!fatent->nr_bhs || bhs[0]->b_blocknr != blocknr) return 0; - /* Does this entry need the next block? */ - if (sbi->fat_bits == 12 && (offset + 1) >= sb->s_blocksize) { - if (fatent->nr_bhs != 2 || bhs[1]->b_blocknr != (blocknr + 1)) - return 0; + if (sbi->fat_bits == 12) { + if ((offset + 1) < sb->s_blocksize) { + /* This entry is on bhs[0]. */ + if (fatent->nr_bhs == 2) { + brelse(bhs[1]); + fatent->nr_bhs = 1; + } + } else { + /* This entry needs the next block. */ + if (fatent->nr_bhs != 2) + return 0; + if (bhs[1]->b_blocknr != (blocknr + 1)) + return 0; + } } ops->ent_set_ptr(fatent, offset); return 1; diff --git a/fs/fat/file.c b/fs/fat/file.c index ddde37025ca6..f06a4e525ece 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -10,13 +10,13 @@ #include <linux/module.h> #include <linux/mount.h> #include <linux/time.h> -#include <linux/msdos_fs.h> #include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/blkdev.h> #include <linux/fsnotify.h> #include <linux/security.h> +#include "fat.h" int fat_generic_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) @@ -29,10 +29,9 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, { u32 attr; - if (inode->i_ino == MSDOS_ROOT_INO) - attr = ATTR_DIR; - else - attr = fat_attr(inode); + mutex_lock(&inode->i_mutex); + attr = fat_make_attrs(inode); + mutex_unlock(&inode->i_mutex); return put_user(attr, user_attr); } @@ -62,20 +61,16 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, /* Merge in ATTR_VOLUME and ATTR_DIR */ attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) | (is_dir ? ATTR_DIR : 0); - oldattr = fat_attr(inode); + oldattr = fat_make_attrs(inode); /* Equivalent to a chmod() */ ia.ia_valid = ATTR_MODE | ATTR_CTIME; ia.ia_ctime = current_fs_time(inode->i_sb); - if (is_dir) { - ia.ia_mode = MSDOS_MKMODE(attr, - S_IRWXUGO & ~sbi->options.fs_dmask) - | S_IFDIR; - } else { - ia.ia_mode = MSDOS_MKMODE(attr, - (S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO)) - & ~sbi->options.fs_fmask) - | S_IFREG; + if (is_dir) + ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO); + else { + ia.ia_mode = fat_make_mode(sbi, attr, + S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO)); } /* The root directory has no attributes */ @@ -115,7 +110,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, inode->i_flags &= S_IMMUTABLE; } - MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED; + fat_save_attrs(inode, attr); mark_inode_dirty(inode); up: mnt_drop_write(filp->f_path.mnt); @@ -274,7 +269,7 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi, /* * Note, the basic check is already done by a caller of - * (attr->ia_mode & ~MSDOS_VALID_MODE) + * (attr->ia_mode & ~FAT_VALID_MODE) */ if (S_ISREG(inode->i_mode)) @@ -287,11 +282,18 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi, /* * Of the r and x bits, all (subject to umask) must be present. Of the * w bits, either all (subject to umask) or none must be present. + * + * If fat_mode_can_hold_ro(inode) is false, can't change w bits. */ if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO))) return -EPERM; - if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask))) - return -EPERM; + if (fat_mode_can_hold_ro(inode)) { + if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask))) + return -EPERM; + } else { + if ((perm & S_IWUGO) != (S_IWUGO & ~mask)) + return -EPERM; + } *mode_ptr &= S_IFMT | perm; @@ -314,13 +316,15 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) } #define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) +/* valid file mode bits */ +#define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO) int fat_setattr(struct dentry *dentry, struct iattr *attr) { struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); struct inode *inode = dentry->d_inode; - int error = 0; unsigned int ia_valid; + int error; /* * Expand the file. Since inode_setattr() updates ->i_size @@ -356,7 +360,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) ((attr->ia_valid & ATTR_GID) && (attr->ia_gid != sbi->options.fs_gid)) || ((attr->ia_valid & ATTR_MODE) && - (attr->ia_mode & ~MSDOS_VALID_MODE))) + (attr->ia_mode & ~FAT_VALID_MODE))) error = -EPERM; if (error) { @@ -374,7 +378,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) attr->ia_valid &= ~ATTR_MODE; } - error = inode_setattr(inode, attr); + if (attr->ia_valid) + error = inode_setattr(inode, attr); out: return error; } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 19eafbe3c379..bdd8fb7be2ca 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -16,7 +16,6 @@ #include <linux/slab.h> #include <linux/smp_lock.h> #include <linux/seq_file.h> -#include <linux/msdos_fs.h> #include <linux/pagemap.h> #include <linux/mpage.h> #include <linux/buffer_head.h> @@ -27,7 +26,9 @@ #include <linux/uio.h> #include <linux/writeback.h> #include <linux/log2.h> +#include <linux/hash.h> #include <asm/unaligned.h> +#include "fat.h" #ifndef CONFIG_FAT_DEFAULT_IOCHARSET /* if user don't select VFAT, this is undefined. */ @@ -63,7 +64,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock, sector_t phys; int err, offset; - err = fat_bmap(inode, iblock, &phys, &mapped_blocks); + err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create); if (err) return err; if (phys) { @@ -93,7 +94,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock, *max_blocks = min(mapped_blocks, *max_blocks); MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits; - err = fat_bmap(inode, iblock, &phys, &mapped_blocks); + err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create); if (err) return err; @@ -175,7 +176,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, if (rw == WRITE) { /* - * FIXME: blockdev_direct_IO() doesn't use ->prepare_write(), + * FIXME: blockdev_direct_IO() doesn't use ->write_begin(), * so we need to update the ->mmu_private to block boundary. * * But we must fill the remaining area or hole by nul for @@ -198,7 +199,14 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, static sector_t _fat_bmap(struct address_space *mapping, sector_t block) { - return generic_block_bmap(mapping, block, fat_get_block); + sector_t blocknr; + + /* fat_get_cluster() assumes the requested blocknr isn't truncated. */ + mutex_lock(&mapping->host->i_mutex); + blocknr = generic_block_bmap(mapping, block, fat_get_block); + mutex_unlock(&mapping->host->i_mutex); + + return blocknr; } static const struct address_space_operations fat_aops = { @@ -247,25 +255,21 @@ static void fat_hash_init(struct super_block *sb) INIT_HLIST_HEAD(&sbi->inode_hashtable[i]); } -static inline unsigned long fat_hash(struct super_block *sb, loff_t i_pos) +static inline unsigned long fat_hash(loff_t i_pos) { - unsigned long tmp = (unsigned long)i_pos | (unsigned long) sb; - tmp = tmp + (tmp >> FAT_HASH_BITS) + (tmp >> FAT_HASH_BITS * 2); - return tmp & FAT_HASH_MASK; + return hash_32(i_pos, FAT_HASH_BITS); } void fat_attach(struct inode *inode, loff_t i_pos) { - struct super_block *sb = inode->i_sb; - struct msdos_sb_info *sbi = MSDOS_SB(sb); + struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); + struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos); spin_lock(&sbi->inode_hash_lock); MSDOS_I(inode)->i_pos = i_pos; - hlist_add_head(&MSDOS_I(inode)->i_fat_hash, - sbi->inode_hashtable + fat_hash(sb, i_pos)); + hlist_add_head(&MSDOS_I(inode)->i_fat_hash, head); spin_unlock(&sbi->inode_hash_lock); } - EXPORT_SYMBOL_GPL(fat_attach); void fat_detach(struct inode *inode) @@ -276,13 +280,12 @@ void fat_detach(struct inode *inode) hlist_del_init(&MSDOS_I(inode)->i_fat_hash); spin_unlock(&sbi->inode_hash_lock); } - EXPORT_SYMBOL_GPL(fat_detach); struct inode *fat_iget(struct super_block *sb, loff_t i_pos) { struct msdos_sb_info *sbi = MSDOS_SB(sb); - struct hlist_head *head = sbi->inode_hashtable + fat_hash(sb, i_pos); + struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos); struct hlist_node *_p; struct msdos_inode_info *i; struct inode *inode = NULL; @@ -341,8 +344,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) { inode->i_generation &= ~1; - inode->i_mode = MSDOS_MKMODE(de->attr, - S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR; + inode->i_mode = fat_make_mode(sbi, de->attr, S_IRWXUGO); inode->i_op = sbi->dir_ops; inode->i_fop = &fat_dir_operations; @@ -359,10 +361,9 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) inode->i_nlink = fat_subdirs(inode); } else { /* not a directory */ inode->i_generation |= 1; - inode->i_mode = MSDOS_MKMODE(de->attr, - ((sbi->options.showexec && !is_exec(de->name + 8)) - ? S_IRUGO|S_IWUGO : S_IRWXUGO) - & ~sbi->options.fs_fmask) | S_IFREG; + inode->i_mode = fat_make_mode(sbi, de->attr, + ((sbi->options.showexec && !is_exec(de->name + 8)) + ? S_IRUGO|S_IWUGO : S_IRWXUGO)); MSDOS_I(inode)->i_start = le16_to_cpu(de->start); if (sbi->fat_bits == 32) MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16); @@ -378,25 +379,16 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) if (sbi->options.sys_immutable) inode->i_flags |= S_IMMUTABLE; } - MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED; + fat_save_attrs(inode, de->attr); + inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) & ~((loff_t)sbi->cluster_size - 1)) >> 9; - inode->i_mtime.tv_sec = - date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date), - sbi->options.tz_utc); - inode->i_mtime.tv_nsec = 0; + + fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0); if (sbi->options.isvfat) { - int secs = de->ctime_cs / 100; - int csecs = de->ctime_cs % 100; - inode->i_ctime.tv_sec = - date_dos2unix(le16_to_cpu(de->ctime), - le16_to_cpu(de->cdate), - sbi->options.tz_utc) + secs; - inode->i_ctime.tv_nsec = csecs * 10000000; - inode->i_atime.tv_sec = - date_dos2unix(0, le16_to_cpu(de->adate), - sbi->options.tz_utc); - inode->i_atime.tv_nsec = 0; + fat_time_fat2unix(sbi, &inode->i_ctime, de->ctime, + de->cdate, de->ctime_cs); + fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0); } else inode->i_ctime = inode->i_atime = inode->i_mtime; @@ -443,13 +435,8 @@ static void fat_delete_inode(struct inode *inode) static void fat_clear_inode(struct inode *inode) { - struct super_block *sb = inode->i_sb; - struct msdos_sb_info *sbi = MSDOS_SB(sb); - - spin_lock(&sbi->inode_hash_lock); fat_cache_inval_inode(inode); - hlist_del_init(&MSDOS_I(inode)->i_fat_hash); - spin_unlock(&sbi->inode_hash_lock); + fat_detach(inode); } static void fat_write_super(struct super_block *sb) @@ -555,6 +542,20 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } +static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi, + struct inode *inode) +{ + loff_t i_pos; +#if BITS_PER_LONG == 32 + spin_lock(&sbi->inode_hash_lock); +#endif + i_pos = MSDOS_I(inode)->i_pos; +#if BITS_PER_LONG == 32 + spin_unlock(&sbi->inode_hash_lock); +#endif + return i_pos; +} + static int fat_write_inode(struct inode *inode, int wait) { struct super_block *sb = inode->i_sb; @@ -564,9 +565,12 @@ static int fat_write_inode(struct inode *inode, int wait) loff_t i_pos; int err; + if (inode->i_ino == MSDOS_ROOT_INO) + return 0; + retry: - i_pos = MSDOS_I(inode)->i_pos; - if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) + i_pos = fat_i_pos_read(sbi, inode); + if (!i_pos) return 0; bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); @@ -588,19 +592,17 @@ retry: raw_entry->size = 0; else raw_entry->size = cpu_to_le32(inode->i_size); - raw_entry->attr = fat_attr(inode); + raw_entry->attr = fat_make_attrs(inode); raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart); raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); - fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, - &raw_entry->date, sbi->options.tz_utc); + fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time, + &raw_entry->date, NULL); if (sbi->options.isvfat) { __le16 atime; - fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime, - &raw_entry->cdate, sbi->options.tz_utc); - fat_date_unix2dos(inode->i_atime.tv_sec, &atime, - &raw_entry->adate, sbi->options.tz_utc); - raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 + - inode->i_ctime.tv_nsec / 10000000; + fat_time_unix2fat(sbi, &inode->i_ctime, &raw_entry->ctime, + &raw_entry->cdate, &raw_entry->ctime_cs); + fat_time_unix2fat(sbi, &inode->i_atime, &atime, + &raw_entry->adate, NULL); } spin_unlock(&sbi->inode_hash_lock); mark_buffer_dirty(bh); @@ -819,8 +821,10 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt) seq_puts(m, ",uni_xlate"); if (!opts->numtail) seq_puts(m, ",nonumtail"); + if (opts->rodir) + seq_puts(m, ",rodir"); } - if (sbi->options.flush) + if (opts->flush) seq_puts(m, ",flush"); if (opts->tz_utc) seq_puts(m, ",tz=UTC"); @@ -836,7 +840,7 @@ enum { Opt_charset, Opt_shortname_lower, Opt_shortname_win95, Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, - Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err, + Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err, }; static const match_table_t fat_tokens = { @@ -908,6 +912,7 @@ static const match_table_t vfat_tokens = { {Opt_nonumtail_yes, "nonumtail=yes"}, {Opt_nonumtail_yes, "nonumtail=true"}, {Opt_nonumtail_yes, "nonumtail"}, + {Opt_rodir, "rodir"}, {Opt_err, NULL} }; @@ -927,10 +932,13 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, opts->allow_utime = -1; opts->codepage = fat_default_codepage; opts->iocharset = fat_default_iocharset; - if (is_vfat) + if (is_vfat) { opts->shortname = VFAT_SFN_DISPLAY_LOWER|VFAT_SFN_CREATE_WIN95; - else + opts->rodir = 0; + } else { opts->shortname = 0; + opts->rodir = 1; + } opts->name_check = 'n'; opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0; opts->utf8 = opts->unicode_xlate = 0; @@ -1081,6 +1089,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, case Opt_nonumtail_yes: /* empty or 1 or yes or true */ opts->numtail = 0; /* negated option */ break; + case Opt_rodir: + opts->rodir = 1; + break; /* obsolete mount options */ case Opt_obsolate: @@ -1126,7 +1137,7 @@ static int fat_read_root(struct inode *inode) inode->i_gid = sbi->options.fs_gid; inode->i_version++; inode->i_generation = 0; - inode->i_mode = (S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR; + inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO); inode->i_op = sbi->dir_ops; inode->i_fop = &fat_dir_operations; if (sbi->fat_bits == 32) { @@ -1143,7 +1154,7 @@ static int fat_read_root(struct inode *inode) MSDOS_I(inode)->i_logstart = 0; MSDOS_I(inode)->mmu_private = inode->i_size; - MSDOS_I(inode)->i_attrs = ATTR_NONE; + fat_save_attrs(inode, ATTR_DIR); inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0; inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0; inode->i_nlink = fat_subdirs(inode)+2; diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 79fb98ad36d4..ac39ebcc1496 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -8,8 +8,8 @@ #include <linux/module.h> #include <linux/fs.h> -#include <linux/msdos_fs.h> #include <linux/buffer_head.h> +#include "fat.h" /* * fat_fs_panic reports a severe file system problem and sets the file system @@ -124,8 +124,9 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster) mark_inode_dirty(inode); } if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) { - fat_fs_panic(sb, "clusters badly computed (%d != %lu)", - new_fclus, inode->i_blocks >> (sbi->cluster_bits - 9)); + fat_fs_panic(sb, "clusters badly computed (%d != %llu)", + new_fclus, + (llu)(inode->i_blocks >> (sbi->cluster_bits - 9))); fat_cache_inval_inode(inode); } inode->i_blocks += nr_cluster << (sbi->cluster_bits - 9); @@ -135,65 +136,131 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster) extern struct timezone sys_tz; +/* + * The epoch of FAT timestamp is 1980. + * : bits : value + * date: 0 - 4: day (1 - 31) + * date: 5 - 8: month (1 - 12) + * date: 9 - 15: year (0 - 127) from 1980 + * time: 0 - 4: sec (0 - 29) 2sec counts + * time: 5 - 10: min (0 - 59) + * time: 11 - 15: hour (0 - 23) + */ +#define SECS_PER_MIN 60 +#define SECS_PER_HOUR (60 * 60) +#define SECS_PER_DAY (SECS_PER_HOUR * 24) +#define UNIX_SECS_1980 315532800L +#if BITS_PER_LONG == 64 +#define UNIX_SECS_2108 4354819200L +#endif +/* days between 1.1.70 and 1.1.80 (2 leap days) */ +#define DAYS_DELTA (365 * 10 + 2) +/* 120 (2100 - 1980) isn't leap year */ +#define YEAR_2100 120 +#define IS_LEAP_YEAR(y) (!((y) & 3) && (y) != YEAR_2100) + /* Linear day numbers of the respective 1sts in non-leap years. */ -static int day_n[] = { - /* Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec */ - 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, 0 +static time_t days_in_year[] = { + /* Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec */ + 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, }; -/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */ -int date_dos2unix(unsigned short time, unsigned short date, int tz_utc) +/* Convert a FAT time/date pair to a UNIX date (seconds since 1 1 70). */ +void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, + __le16 __time, __le16 __date, u8 time_cs) { - int month, year, secs; + u16 time = le16_to_cpu(__time), date = le16_to_cpu(__date); + time_t second, day, leap_day, month, year; - /* - * first subtract and mask after that... Otherwise, if - * date == 0, bad things happen - */ - month = ((date >> 5) - 1) & 15; - year = date >> 9; - secs = (time & 31)*2+60*((time >> 5) & 63)+(time >> 11)*3600+86400* - ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 && - month < 2 ? 1 : 0)+3653); - /* days since 1.1.70 plus 80's leap day */ - if (!tz_utc) - secs += sys_tz.tz_minuteswest*60; - return secs; + year = date >> 9; + month = max(1, (date >> 5) & 0xf); + day = max(1, date & 0x1f) - 1; + + leap_day = (year + 3) / 4; + if (year > YEAR_2100) /* 2100 isn't leap year */ + leap_day--; + if (IS_LEAP_YEAR(year) && month > 2) + leap_day++; + + second = (time & 0x1f) << 1; + second += ((time >> 5) & 0x3f) * SECS_PER_MIN; + second += (time >> 11) * SECS_PER_HOUR; + second += (year * 365 + leap_day + + days_in_year[month] + day + + DAYS_DELTA) * SECS_PER_DAY; + + if (!sbi->options.tz_utc) + second += sys_tz.tz_minuteswest * SECS_PER_MIN; + + if (time_cs) { + ts->tv_sec = second + (time_cs / 100); + ts->tv_nsec = (time_cs % 100) * 10000000; + } else { + ts->tv_sec = second; + ts->tv_nsec = 0; + } } -/* Convert linear UNIX date to a MS-DOS time/date pair. */ -void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc) +/* Convert linear UNIX date to a FAT time/date pair. */ +void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts, + __le16 *time, __le16 *date, u8 *time_cs) { - int day, year, nl_day, month; + time_t second = ts->tv_sec; + time_t day, leap_day, month, year; - if (!tz_utc) - unix_date -= sys_tz.tz_minuteswest*60; + if (!sbi->options.tz_utc) + second -= sys_tz.tz_minuteswest * SECS_PER_MIN; /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */ - if (unix_date < 315532800) - unix_date = 315532800; - - *time = cpu_to_le16((unix_date % 60)/2+(((unix_date/60) % 60) << 5)+ - (((unix_date/3600) % 24) << 11)); - day = unix_date/86400-3652; - year = day/365; - if ((year+3)/4+365*year > day) + if (second < UNIX_SECS_1980) { + *time = 0; + *date = cpu_to_le16((0 << 9) | (1 << 5) | 1); + if (time_cs) + *time_cs = 0; + return; + } +#if BITS_PER_LONG == 64 + if (second >= UNIX_SECS_2108) { + *time = cpu_to_le16((23 << 11) | (59 << 5) | 29); + *date = cpu_to_le16((127 << 9) | (12 << 5) | 31); + if (time_cs) + *time_cs = 199; + return; + } +#endif + + day = second / SECS_PER_DAY - DAYS_DELTA; + year = day / 365; + leap_day = (year + 3) / 4; + if (year > YEAR_2100) /* 2100 isn't leap year */ + leap_day--; + if (year * 365 + leap_day > day) year--; - day -= (year+3)/4+365*year; - if (day == 59 && !(year & 3)) { - nl_day = day; + leap_day = (year + 3) / 4; + if (year > YEAR_2100) /* 2100 isn't leap year */ + leap_day--; + day -= year * 365 + leap_day; + + if (IS_LEAP_YEAR(year) && day == days_in_year[3]) { month = 2; } else { - nl_day = (year & 3) || day <= 59 ? day : day-1; - for (month = 0; month < 12; month++) { - if (day_n[month] > nl_day) + if (IS_LEAP_YEAR(year) && day > days_in_year[3]) + day--; + for (month = 1; month < 12; month++) { + if (days_in_year[month + 1] > day) break; } } - *date = cpu_to_le16(nl_day-day_n[month-1]+1+(month << 5)+(year << 9)); -} + day -= days_in_year[month]; -EXPORT_SYMBOL_GPL(fat_date_unix2dos); + *time = cpu_to_le16(((second / SECS_PER_HOUR) % 24) << 11 + | ((second / SECS_PER_MIN) % 60) << 5 + | (second % SECS_PER_MIN) >> 1); + *date = cpu_to_le16((year << 9) | (month << 5) | (day + 1)); + if (time_cs) + *time_cs = (ts->tv_sec & 1) * 100 + ts->tv_nsec / 10000000; +} +EXPORT_SYMBOL_GPL(fat_time_unix2fat); int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) { diff --git a/fs/msdos/namei.c b/fs/fat/namei_msdos.c index e844b9809d27..7ba03a4acbe0 100644 --- a/fs/msdos/namei.c +++ b/fs/fat/namei_msdos.c @@ -9,8 +9,8 @@ #include <linux/module.h> #include <linux/time.h> #include <linux/buffer_head.h> -#include <linux/msdos_fs.h> #include <linux/smp_lock.h> +#include "fat.h" /* Characters that are undesirable in an MS-DOS file name */ static unsigned char bad_chars[] = "*?<>|\""; @@ -203,33 +203,37 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, { struct super_block *sb = dir->i_sb; struct fat_slot_info sinfo; - struct inode *inode = NULL; - int res; - - dentry->d_op = &msdos_dentry_operations; + struct inode *inode; + int err; lock_super(sb); - res = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); - if (res == -ENOENT) - goto add; - if (res < 0) - goto out; + + err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); + if (err) { + if (err == -ENOENT) { + inode = NULL; + goto out; + } + goto error; + } + inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); brelse(sinfo.bh); if (IS_ERR(inode)) { - res = PTR_ERR(inode); - goto out; + err = PTR_ERR(inode); + goto error; } -add: - res = 0; +out: + unlock_super(sb); + dentry->d_op = &msdos_dentry_operations; dentry = d_splice_alias(inode, dentry); if (dentry) dentry->d_op = &msdos_dentry_operations; -out: + return dentry; + +error: unlock_super(sb); - if (!res) - return dentry; - return ERR_PTR(res); + return ERR_PTR(err); } /***** Creates a directory entry (name is already formatted). */ @@ -247,7 +251,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name, if (is_hid) de.attr |= ATTR_HIDDEN; de.lcase = 0; - fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); + fat_time_unix2fat(sbi, ts, &time, &date, NULL); de.cdate = de.adate = 0; de.ctime = 0; de.ctime_cs = 0; diff --git a/fs/vfat/namei.c b/fs/fat/namei_vfat.c index 155c10b4adbd..bf326d4356a3 100644 --- a/fs/vfat/namei.c +++ b/fs/fat/namei_vfat.c @@ -16,36 +16,75 @@ */ #include <linux/module.h> - #include <linux/jiffies.h> -#include <linux/msdos_fs.h> #include <linux/ctype.h> #include <linux/slab.h> #include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/namei.h> +#include "fat.h" -static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) +/* + * If new entry was created in the parent, it could create the 8.3 + * alias (the shortname of logname). So, the parent may have the + * negative-dentry which matches the created 8.3 alias. + * + * If it happened, the negative dentry isn't actually negative + * anymore. So, drop it. + */ +static int vfat_revalidate_shortname(struct dentry *dentry) { int ret = 1; - - if (!dentry->d_inode && - nd && !(nd->flags & LOOKUP_CONTINUE) && (nd->flags & LOOKUP_CREATE)) - /* - * negative dentry is dropped, in order to make sure - * to use the name which a user desires if this is - * create path. - */ + spin_lock(&dentry->d_lock); + if (dentry->d_time != dentry->d_parent->d_inode->i_version) ret = 0; - else { - spin_lock(&dentry->d_lock); - if (dentry->d_time != dentry->d_parent->d_inode->i_version) - ret = 0; - spin_unlock(&dentry->d_lock); - } + spin_unlock(&dentry->d_lock); return ret; } +static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + /* This is not negative dentry. Always valid. */ + if (dentry->d_inode) + return 1; + return vfat_revalidate_shortname(dentry); +} + +static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) +{ + /* + * This is not negative dentry. Always valid. + * + * Note, rename() to existing directory entry will have ->d_inode, + * and will use existing name which isn't specified name by user. + * + * We may be able to drop this positive dentry here. But dropping + * positive dentry isn't good idea. So it's unsupported like + * rename("filename", "FILENAME") for now. + */ + if (dentry->d_inode) + return 1; + + /* + * This may be nfsd (or something), anyway, we can't see the + * intent of this. So, since this can be for creation, drop it. + */ + if (!nd) + return 0; + + /* + * Drop the negative dentry, in order to make sure to use the + * case sensitive name which is specified by user if this is + * for creation. + */ + if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) { + if (nd->flags & LOOKUP_CREATE) + return 0; + } + + return vfat_revalidate_shortname(dentry); +} + /* returns the length of a struct qstr, ignoring trailing dots */ static unsigned int vfat_striptail_len(struct qstr *qstr) { @@ -127,25 +166,16 @@ static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) return 1; } -static struct dentry_operations vfat_dentry_ops[4] = { - { - .d_hash = vfat_hashi, - .d_compare = vfat_cmpi, - }, - { - .d_revalidate = vfat_revalidate, - .d_hash = vfat_hashi, - .d_compare = vfat_cmpi, - }, - { - .d_hash = vfat_hash, - .d_compare = vfat_cmp, - }, - { - .d_revalidate = vfat_revalidate, - .d_hash = vfat_hash, - .d_compare = vfat_cmp, - } +static struct dentry_operations vfat_ci_dentry_ops = { + .d_revalidate = vfat_revalidate_ci, + .d_hash = vfat_hashi, + .d_compare = vfat_cmpi, +}; + +static struct dentry_operations vfat_dentry_ops = { + .d_revalidate = vfat_revalidate, + .d_hash = vfat_hash, + .d_compare = vfat_cmp, }; /* Characters that are undesirable in an MS-DOS file name */ @@ -569,6 +599,7 @@ static int vfat_build_slots(struct inode *dir, const unsigned char *name, unsigned char msdos_name[MSDOS_NAME]; wchar_t *uname; __le16 time, date; + u8 time_cs; int err, ulen, usize, i; loff_t offset; @@ -621,10 +652,10 @@ shortname: memcpy(de->name, msdos_name, MSDOS_NAME); de->attr = is_dir ? ATTR_DIR : ATTR_ARCH; de->lcase = lcase; - fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); + fat_time_unix2fat(sbi, ts, &time, &date, &time_cs); de->time = de->ctime = time; de->date = de->cdate = de->adate = date; - de->ctime_cs = 0; + de->ctime_cs = time_cs; de->start = cpu_to_le16(cluster); de->starthi = cpu_to_le16(cluster >> 16); de->size = 0; @@ -683,46 +714,58 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, { struct super_block *sb = dir->i_sb; struct fat_slot_info sinfo; - struct inode *inode = NULL; + struct inode *inode; struct dentry *alias; - int err, table; + int err; lock_super(sb); - table = (MSDOS_SB(sb)->options.name_check == 's') ? 2 : 0; - dentry->d_op = &vfat_dentry_ops[table]; err = vfat_find(dir, &dentry->d_name, &sinfo); if (err) { - table++; + if (err == -ENOENT) { + inode = NULL; + goto out; + } goto error; } + inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); brelse(sinfo.bh); if (IS_ERR(inode)) { - unlock_super(sb); - return ERR_CAST(inode); + err = PTR_ERR(inode); + goto error; } - alias = d_find_alias(inode); - if (alias) { - if (d_invalidate(alias) == 0) - dput(alias); - else { - iput(inode); - unlock_super(sb); - return alias; - } + alias = d_find_alias(inode); + if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) { + /* + * This inode has non DCACHE_DISCONNECTED dentry. This + * means, the user did ->lookup() by an another name + * (longname vs 8.3 alias of it) in past. + * + * Switch to new one for reason of locality if possible. + */ + BUG_ON(d_unhashed(alias)); + if (!S_ISDIR(inode->i_mode)) + d_move(alias, dentry); + iput(inode); + unlock_super(sb); + return alias; } -error: +out: unlock_super(sb); - dentry->d_op = &vfat_dentry_ops[table]; + dentry->d_op = sb->s_root->d_op; dentry->d_time = dentry->d_parent->d_inode->i_version; dentry = d_splice_alias(inode, dentry); if (dentry) { - dentry->d_op = &vfat_dentry_ops[table]; + dentry->d_op = sb->s_root->d_op; dentry->d_time = dentry->d_parent->d_inode->i_version; } return dentry; + +error: + unlock_super(sb); + return ERR_PTR(err); } static int vfat_create(struct inode *dir, struct dentry *dentry, int mode, @@ -1014,9 +1057,9 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent) return res; if (MSDOS_SB(sb)->options.name_check != 's') - sb->s_root->d_op = &vfat_dentry_ops[0]; + sb->s_root->d_op = &vfat_ci_dentry_ops; else - sb->s_root->d_op = &vfat_dentry_ops[2]; + sb->s_root->d_op = &vfat_dentry_ops; return 0; } diff --git a/fs/file_table.c b/fs/file_table.c index efc06faede6c..5ad0eca6eea2 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -269,6 +269,10 @@ void __fput(struct file *file) eventpoll_release(file); locks_remove_flock(file); + if (unlikely(file->f_flags & FASYNC)) { + if (file->f_op && file->f_op->fasync) + file->f_op->fasync(-1, file, 0); + } if (file->f_op && file->f_op->release) file->f_op->release(inode, file); security_file_free(file); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 87250b6a8682..b72361479be2 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1056,7 +1056,6 @@ static int fuse_dev_release(struct inode *inode, struct file *file) end_requests(fc, &fc->pending); end_requests(fc, &fc->processing); spin_unlock(&fc->lock); - fasync_helper(-1, file, 0, &fc->fasync); fuse_conn_put(fc); } diff --git a/fs/inotify_user.c b/fs/inotify_user.c index d85c7d931cdf..d367e9b92862 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -537,9 +537,6 @@ static int inotify_release(struct inode *ignored, struct file *file) inotify_dev_event_dequeue(dev); mutex_unlock(&dev->ev_mutex); - if (file->f_flags & FASYNC) - inotify_fasync(-1, file, 0); - /* free this device: the put matching the get in inotify_init() */ put_inotify_dev(dev); diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 1bd8d4acc6f2..61f32f3868cd 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -115,7 +115,7 @@ static int __try_to_free_cp_buf(struct journal_head *jh) */ void __log_wait_for_space(journal_t *journal) { - int nblocks; + int nblocks, space_left; assert_spin_locked(&journal->j_state_lock); nblocks = jbd_space_needed(journal); @@ -128,25 +128,42 @@ void __log_wait_for_space(journal_t *journal) /* * Test again, another process may have checkpointed while we * were waiting for the checkpoint lock. If there are no - * outstanding transactions there is nothing to checkpoint and - * we can't make progress. Abort the journal in this case. + * transactions ready to be checkpointed, try to recover + * journal space by calling cleanup_journal_tail(), and if + * that doesn't work, by waiting for the currently committing + * transaction to complete. If there is absolutely no way + * to make progress, this is either a BUG or corrupted + * filesystem, so abort the journal and leave a stack + * trace for forensic evidence. */ spin_lock(&journal->j_state_lock); spin_lock(&journal->j_list_lock); nblocks = jbd_space_needed(journal); - if (__log_space_left(journal) < nblocks) { + space_left = __log_space_left(journal); + if (space_left < nblocks) { int chkpt = journal->j_checkpoint_transactions != NULL; + tid_t tid = 0; + if (journal->j_committing_transaction) + tid = journal->j_committing_transaction->t_tid; spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_state_lock); if (chkpt) { log_do_checkpoint(journal); + } else if (cleanup_journal_tail(journal) == 0) { + /* We were able to recover space; yay! */ + ; + } else if (tid) { + log_wait_commit(journal, tid); } else { - printk(KERN_ERR "%s: no transactions\n", - __func__); + printk(KERN_ERR "%s: needed %d blocks and " + "only had %d space available\n", + __func__, nblocks, space_left); + printk(KERN_ERR "%s: no way to get more " + "journal space\n", __func__); + WARN_ON(1); journal_abort(journal, 0); } - spin_lock(&journal->j_state_lock); } else { spin_unlock(&journal->j_list_lock); diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index d15cd6e7251e..60d4c32c8808 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -860,7 +860,6 @@ out: * int journal_get_undo_access() - Notify intent to modify metadata with non-rewindable consequences * @handle: transaction * @bh: buffer to undo - * @credits: store the number of taken credits here (if not NULL) * * Sometimes there is a need to distinguish between metadata which has * been committed to disk and that which has not. The ext3fs code uses diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 9203c3332f17..9497718fe920 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -116,7 +116,7 @@ static int __try_to_free_cp_buf(struct journal_head *jh) */ void __jbd2_log_wait_for_space(journal_t *journal) { - int nblocks; + int nblocks, space_left; assert_spin_locked(&journal->j_state_lock); nblocks = jbd_space_needed(journal); @@ -129,25 +129,43 @@ void __jbd2_log_wait_for_space(journal_t *journal) /* * Test again, another process may have checkpointed while we * were waiting for the checkpoint lock. If there are no - * outstanding transactions there is nothing to checkpoint and - * we can't make progress. Abort the journal in this case. + * transactions ready to be checkpointed, try to recover + * journal space by calling cleanup_journal_tail(), and if + * that doesn't work, by waiting for the currently committing + * transaction to complete. If there is absolutely no way + * to make progress, this is either a BUG or corrupted + * filesystem, so abort the journal and leave a stack + * trace for forensic evidence. */ spin_lock(&journal->j_state_lock); spin_lock(&journal->j_list_lock); nblocks = jbd_space_needed(journal); - if (__jbd2_log_space_left(journal) < nblocks) { + space_left = __jbd2_log_space_left(journal); + if (space_left < nblocks) { int chkpt = journal->j_checkpoint_transactions != NULL; + tid_t tid = 0; + if (journal->j_committing_transaction) + tid = journal->j_committing_transaction->t_tid; spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_state_lock); if (chkpt) { jbd2_log_do_checkpoint(journal); + } else if (jbd2_cleanup_journal_tail(journal) == 0) { + /* We were able to recover space; yay! */ + ; + } else if (tid) { + jbd2_log_wait_commit(journal, tid); } else { - printk(KERN_ERR "%s: no transactions\n", - __func__); + printk(KERN_ERR "%s: needed %d blocks and " + "only had %d space available\n", + __func__, nblocks, space_left); + printk(KERN_ERR "%s: no way to get more " + "journal space in %s\n", __func__, + journal->j_devname); + WARN_ON(1); jbd2_journal_abort(journal, 0); } - spin_lock(&journal->j_state_lock); } else { spin_unlock(&journal->j_list_lock); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 8b119e16aa36..ebc667bc54a8 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -974,6 +974,9 @@ restart_loop: journal->j_committing_transaction = NULL; spin_unlock(&journal->j_state_lock); + if (journal->j_commit_callback) + journal->j_commit_callback(journal, commit_transaction); + if (commit_transaction->t_checkpoint_list == NULL && commit_transaction->t_checkpoint_io_list == NULL) { __jbd2_journal_drop_transaction(journal, commit_transaction); @@ -995,11 +998,8 @@ restart_loop: } spin_unlock(&journal->j_list_lock); - if (journal->j_commit_callback) - journal->j_commit_callback(journal, commit_transaction); - trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", - journal->j_devname, commit_transaction->t_tid, + journal->j_devname, journal->j_commit_sequence, journal->j_tail_sequence); jbd_debug(1, "JBD: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 783de118de92..e70d657a19f8 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1089,6 +1089,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) if (!journal->j_wbuf) { printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", __func__); + jbd2_stats_proc_exit(journal); kfree(journal); return NULL; } @@ -1098,6 +1099,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) if (err) { printk(KERN_ERR "%s: Cannnot locate journal superblock\n", __func__); + jbd2_stats_proc_exit(journal); kfree(journal); return NULL; } diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index 8adebd3e43c6..3cceef4ad2b7 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c @@ -85,15 +85,15 @@ static int jffs2_garbage_collect_thread(void *_c) for (;;) { allow_signal(SIGHUP); again: + spin_lock(&c->erase_completion_lock); if (!jffs2_thread_should_wake(c)) { set_current_state (TASK_INTERRUPTIBLE); + spin_unlock(&c->erase_completion_lock); D1(printk(KERN_DEBUG "jffs2_garbage_collect_thread sleeping...\n")); - /* Yes, there's a race here; we checked jffs2_thread_should_wake() - before setting current->state to TASK_INTERRUPTIBLE. But it doesn't - matter - We don't care if we miss a wakeup, because the GC thread - is only an optimisation anyway. */ schedule(); - } + } else + spin_unlock(&c->erase_completion_lock); + /* This thread is purely an optimisation. But if it runs when other things could be running, it actually makes things a diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c index 47b045797e42..90cb60d09787 100644 --- a/fs/jffs2/compr_lzo.c +++ b/fs/jffs2/compr_lzo.c @@ -19,7 +19,7 @@ static void *lzo_mem; static void *lzo_compress_buf; -static DEFINE_MUTEX(deflate_mutex); +static DEFINE_MUTEX(deflate_mutex); /* for lzo_mem and lzo_compress_buf */ static void free_workspace(void) { @@ -49,18 +49,21 @@ static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out, mutex_lock(&deflate_mutex); ret = lzo1x_1_compress(data_in, *sourcelen, lzo_compress_buf, &compress_size, lzo_mem); - mutex_unlock(&deflate_mutex); - if (ret != LZO_E_OK) - return -1; + goto fail; if (compress_size > *dstlen) - return -1; + goto fail; memcpy(cpage_out, lzo_compress_buf, compress_size); - *dstlen = compress_size; + mutex_unlock(&deflate_mutex); + *dstlen = compress_size; return 0; + + fail: + mutex_unlock(&deflate_mutex); + return -1; } static int jffs2_lzo_decompress(unsigned char *data_in, unsigned char *cpage_out, diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index 0875b60b4bf7..21a052915aa9 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c @@ -261,9 +261,11 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c) jffs2_sum_reset_collected(c->summary); /* reset collected summary */ +#ifdef CONFIG_JFFS2_FS_WRITEBUFFER /* adjust write buffer offset, else we get a non contiguous write bug */ if (!(c->wbuf_ofs % c->sector_size) && !c->wbuf_len) c->wbuf_ofs = 0xffffffff; +#endif D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset)); diff --git a/fs/libfs.c b/fs/libfs.c index 74688598bcf7..e960a8321902 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -814,7 +814,7 @@ EXPORT_SYMBOL(simple_getattr); EXPORT_SYMBOL(simple_link); EXPORT_SYMBOL(simple_lookup); EXPORT_SYMBOL(simple_pin_fs); -EXPORT_SYMBOL(simple_prepare_write); +EXPORT_UNUSED_SYMBOL(simple_prepare_write); EXPORT_SYMBOL(simple_readpage); EXPORT_SYMBOL(simple_release_fs); EXPORT_SYMBOL(simple_rename); diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 014f6ce48172..4dfdcbc6bf68 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -434,6 +434,7 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, * reclaim all locks we hold on this server. */ memset(&saddr, 0, sizeof(saddr)); + saddr.sin_family = AF_INET; saddr.sin_addr.s_addr = argp->addr; nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state); diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 548b0bb2b84d..3ca89e2a9381 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -466,6 +466,7 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, * reclaim all locks we hold on this server. */ memset(&saddr, 0, sizeof(saddr)); + saddr.sin_family = AF_INET; saddr.sin_addr.s_addr = argp->addr; nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state); diff --git a/fs/msdos/Makefile b/fs/msdos/Makefile deleted file mode 100644 index ea67646fcb95..000000000000 --- a/fs/msdos/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the Linux msdos filesystem routines. -# - -obj-$(CONFIG_MSDOS_FS) += msdos.o - -msdos-y := namei.o diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b9195c02a863..d22eb383e1cf 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -5,7 +5,7 @@ * * nfs inode and superblock handling functions * - * Modularised by Alan Cox <Alan.Cox@linux.org>, while hacking some + * Modularised by Alan Cox <alan@lxorguk.ukuu.org.uk>, while hacking some * experimental NFS changes. Modularisation taken straight from SYS5 fs. * * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts. @@ -908,21 +908,16 @@ static int nfs_size_need_update(const struct inode *inode, const struct nfs_fatt return nfs_size_to_loff_t(fattr->size) > i_size_read(inode); } -static unsigned long nfs_attr_generation_counter; +static atomic_long_t nfs_attr_generation_counter; static unsigned long nfs_read_attr_generation_counter(void) { - smp_rmb(); - return nfs_attr_generation_counter; + return atomic_long_read(&nfs_attr_generation_counter); } unsigned long nfs_inc_attr_generation_counter(void) { - unsigned long ret; - smp_rmb(); - ret = ++nfs_attr_generation_counter; - smp_wmb(); - return ret; + return atomic_long_inc_return(&nfs_attr_generation_counter); } void nfs_fattr_init(struct nfs_fattr *fattr) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a3b0061dfd45..f48db679a1c6 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -5,7 +5,7 @@ * * nfs superblock handling functions * - * Modularised by Alan Cox <Alan.Cox@linux.org>, while hacking some + * Modularised by Alan Cox <alan@lxorguk.ukuu.org.uk>, while hacking some * experimental NFS changes. Modularisation taken straight from SYS5 fs. * * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts. diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 0bc56f6d9276..4433c8f00163 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1875,11 +1875,11 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func, return -ENOMEM; offset = *offsetp; - cdp->err = nfserr_eof; /* will be cleared on successful read */ while (1) { unsigned int reclen; + cdp->err = nfserr_eof; /* will be cleared on successful read */ buf.used = 0; buf.full = 0; @@ -1912,8 +1912,6 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func, de = (struct buffered_dirent *)((char *)de + reclen); } offset = vfs_llseek(file, 0, SEEK_CUR); - if (!buf.full) - break; } done: diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8d3225a78073..e2570a3bc2b2 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -247,8 +247,8 @@ int ocfs2_update_inode_atime(struct inode *inode, mlog_entry_void(); handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); - if (handle == NULL) { - ret = -ENOMEM; + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); mlog_errno(ret); goto out; } @@ -312,8 +312,8 @@ static int ocfs2_simple_size_update(struct inode *inode, handle_t *handle = NULL; handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); - if (handle == NULL) { - ret = -ENOMEM; + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); mlog_errno(ret); goto out; } @@ -679,8 +679,7 @@ leave: /* Some parts of this taken from generic_cont_expand, which turned out * to be too fragile to do exactly what we need without us having to - * worry about recursive locking in ->prepare_write() and - * ->commit_write(). */ + * worry about recursive locking in ->write_begin() and ->write_end(). */ static int ocfs2_write_zero_page(struct inode *inode, u64 size) { @@ -1056,8 +1055,8 @@ static int __ocfs2_write_remove_suid(struct inode *inode, (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode); handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); - if (handle == NULL) { - ret = -ENOMEM; + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); mlog_errno(ret); goto out; } @@ -1260,8 +1259,8 @@ static int __ocfs2_remove_inode_range(struct inode *inode, } handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); - if (handle == NULL) { - ret = -ENOMEM; + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); mlog_errno(ret); goto out; } @@ -1353,8 +1352,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, goto out; handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); - if (handle == NULL) { - ret = -ENOMEM; + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); mlog_errno(ret); goto out; } @@ -1867,6 +1866,13 @@ relock: written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, ppos, count, ocount); if (written < 0) { + /* + * direct write may have instantiated a few + * blocks outside i_size. Trim these off again. + * Don't need i_size_read because we hold i_mutex. + */ + if (*ppos + count > inode->i_size) + vmtruncate(inode, inode->i_size); ret = written; goto out_dio; } diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 4903688f72a9..7aa00d511874 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -1106,6 +1106,12 @@ void ocfs2_clear_inode(struct inode *inode) oi->ip_last_trans = 0; oi->ip_dir_start_lookup = 0; oi->ip_blkno = 0ULL; + + /* + * ip_jinode is used to track txns against this inode. We ensure that + * the journal is flushed before journal shutdown. Thus it is safe to + * have inodes get cleaned up after journal shutdown. + */ jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal, &oi->ip_jinode); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 81e40677eecb..99fe9d584f3c 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -690,6 +690,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) /* Shutdown the kernel journal system */ jbd2_journal_destroy(journal->j_journal); + journal->j_journal = NULL; OCFS2_I(inode)->ip_open_count--; diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 3dc18d67557c..eea1d24713ea 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -113,7 +113,11 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, * ocfs2_write_begin_nolock(). */ if (!PageUptodate(page) || page->mapping != inode->i_mapping) { - ret = -EINVAL; + /* + * the page has been umapped in ocfs2_data_downconvert_worker. + * So return 0 here and let VFS retry. + */ + ret = 0; goto out; } diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 485a6aa0ad39..f4967e634ffd 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -378,8 +378,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, } inode = new_inode(dir->i_sb); - if (IS_ERR(inode)) { - status = PTR_ERR(inode); + if (!inode) { + status = -ENOMEM; mlog(ML_ERROR, "new_inode failed!\n"); goto leave; } @@ -491,8 +491,10 @@ leave: brelse(*new_fe_bh); *new_fe_bh = NULL; } - if (inode) + if (inode) { + clear_nlink(inode); iput(inode); + } } mlog_exit(status); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index a21a465490c4..fef7ece32376 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -473,6 +473,9 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) (____gd)->bg_signature); \ } while (0) +#define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \ + (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE)) + static inline unsigned long ino_from_blkno(struct super_block *sb, u64 blkno) { diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index f24ce3d3f956..5f180cf7abbd 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -742,12 +742,12 @@ struct ocfs2_group_desc */ struct ocfs2_xattr_entry { __le32 xe_name_hash; /* hash value of xattr prefix+suffix. */ - __le16 xe_name_offset; /* byte offset from the 1st etnry in the local + __le16 xe_name_offset; /* byte offset from the 1st entry in the local xattr storage(inode, xattr block or xattr bucket). */ __u8 xe_name_len; /* xattr name len, does't include prefix. */ - __u8 xe_type; /* the low 7 bits indicates the name prefix's - * type and the highest 1 bits indicate whether + __u8 xe_type; /* the low 7 bits indicate the name prefix + * type and the highest bit indicates whether * the EA is stored in the local storage. */ __le64 xe_value_size; /* real xattr value length. */ }; @@ -766,9 +766,10 @@ struct ocfs2_xattr_header { xattr. */ __le16 xh_name_value_len; /* total length of name/value length in this bucket. */ - __le16 xh_num_buckets; /* bucket nums in one extent - record, only valid in the - first bucket. */ + __le16 xh_num_buckets; /* Number of xattr buckets + in this extent record, + only valid in the first + bucket. */ __le64 xh_csum; struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */ }; @@ -776,8 +777,8 @@ struct ocfs2_xattr_header { /* * On disk structure for xattr value root. * - * It is used when one extended attribute's size is larger, and we will save it - * in an outside cluster. It will stored in a b-tree like file content. + * When an xattr's value is large enough, it is stored in an external + * b-tree like file data. The xattr value root points to this structure. */ struct ocfs2_xattr_value_root { /*00*/ __le32 xr_clusters; /* clusters covered by xattr value. */ diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 802c41492214..054e2efb0b7e 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3,25 +3,20 @@ * * xattr.c * - * Copyright (C) 2008 Oracle. All rights reserved. + * Copyright (C) 2004, 2008 Oracle. All rights reserved. * * CREDITS: - * Lots of code in this file is taken from ext3. + * Lots of code in this file is copy from linux/fs/ext3/xattr.c. + * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * License version 2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/capability.h> @@ -83,7 +78,7 @@ struct xattr_handler *ocfs2_xattr_handlers[] = { NULL }; -static struct xattr_handler *ocfs2_xattr_handler_map[] = { +static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, }; @@ -116,6 +111,10 @@ static int ocfs2_xattr_bucket_get_name_value(struct inode *inode, int *block_off, int *new_offset); +static int ocfs2_xattr_block_find(struct inode *inode, + int name_index, + const char *name, + struct ocfs2_xattr_search *xs); static int ocfs2_xattr_index_block_find(struct inode *inode, struct buffer_head *root_bh, int name_index, @@ -137,6 +136,24 @@ static int ocfs2_xattr_set_entry_index_block(struct inode *inode, static int ocfs2_delete_xattr_index_block(struct inode *inode, struct buffer_head *xb_bh); +static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) +{ + return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; +} + +static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) +{ + return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); +} + +static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb) +{ + u16 len = sb->s_blocksize - + offsetof(struct ocfs2_xattr_header, xh_entries); + + return len / sizeof(struct ocfs2_xattr_entry); +} + static inline const char *ocfs2_xattr_prefix(int name_index) { struct xattr_handler *handler = NULL; @@ -542,14 +559,12 @@ static int ocfs2_xattr_block_list(struct inode *inode, mlog_errno(ret); return ret; } - /*Verify the signature of xattr block*/ - if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, - strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { - ret = -EFAULT; - goto cleanup; - } xb = (struct ocfs2_xattr_block *)blk_bh->b_data; + if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { + ret = -EIO; + goto cleanup; + } if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; @@ -749,47 +764,25 @@ static int ocfs2_xattr_block_get(struct inode *inode, size_t buffer_size, struct ocfs2_xattr_search *xs) { - struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; - struct buffer_head *blk_bh = NULL; struct ocfs2_xattr_block *xb; struct ocfs2_xattr_value_root *xv; size_t size; int ret = -ENODATA, name_offset, name_len, block_off, i; - if (!di->i_xattr_loc) - return ret; - memset(&xs->bucket, 0, sizeof(xs->bucket)); - ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); - if (ret < 0) { + ret = ocfs2_xattr_block_find(inode, name_index, name, xs); + if (ret) { mlog_errno(ret); - return ret; - } - /*Verify the signature of xattr block*/ - if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, - strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { - ret = -EFAULT; goto cleanup; } - xs->xattr_bh = blk_bh; - xb = (struct ocfs2_xattr_block *)blk_bh->b_data; - - if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { - xs->header = &xb->xb_attrs.xb_header; - xs->base = (void *)xs->header; - xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; - xs->here = xs->header->xh_entries; - - ret = ocfs2_xattr_find_entry(name_index, name, xs); - } else - ret = ocfs2_xattr_index_block_find(inode, blk_bh, - name_index, - name, xs); - - if (ret) + if (xs->not_found) { + ret = -ENODATA; goto cleanup; + } + + xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; size = le64_to_cpu(xs->here->xe_value_size); if (buffer) { ret = -ERANGE; @@ -828,7 +821,8 @@ cleanup: brelse(xs->bucket.bhs[i]); memset(&xs->bucket, 0, sizeof(xs->bucket)); - brelse(blk_bh); + brelse(xs->xattr_bh); + xs->xattr_bh = NULL; return ret; } @@ -837,11 +831,11 @@ cleanup: * Copy an extended attribute into the buffer provided. * Buffer is NULL to compute the size of buffer required. */ -int ocfs2_xattr_get(struct inode *inode, - int name_index, - const char *name, - void *buffer, - size_t buffer_size) +static int ocfs2_xattr_get(struct inode *inode, + int name_index, + const char *name, + void *buffer, + size_t buffer_size) { int ret; struct ocfs2_dinode *di = NULL; @@ -871,7 +865,7 @@ int ocfs2_xattr_get(struct inode *inode, down_read(&oi->ip_xattr_sem); ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, buffer_size, &xis); - if (ret == -ENODATA) + if (ret == -ENODATA && di->i_xattr_loc) ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, buffer_size, &xbs); up_read(&oi->ip_xattr_sem); @@ -1229,7 +1223,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode, free = min_offs - ((void *)last - xs->base) - sizeof(__u32); if (free < 0) - return -EFAULT; + return -EIO; if (!xs->not_found) { size_t size = 0; @@ -1514,10 +1508,9 @@ static int ocfs2_xattr_free_block(struct inode *inode, goto out; } - /*Verify the signature of xattr block*/ - if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, - strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { - ret = -EFAULT; + xb = (struct ocfs2_xattr_block *)blk_bh->b_data; + if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { + ret = -EIO; goto out; } @@ -1527,7 +1520,6 @@ static int ocfs2_xattr_free_block(struct inode *inode, goto out; } - xb = (struct ocfs2_xattr_block *)blk_bh->b_data; blk = le64_to_cpu(xb->xb_blkno); bit = le16_to_cpu(xb->xb_suballoc_bit); bg_blkno = ocfs2_which_suballoc_group(blk, bit); @@ -1771,15 +1763,14 @@ static int ocfs2_xattr_block_find(struct inode *inode, mlog_errno(ret); return ret; } - /*Verify the signature of xattr block*/ - if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, - strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { - ret = -EFAULT; - goto cleanup; + + xb = (struct ocfs2_xattr_block *)blk_bh->b_data; + if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { + ret = -EIO; + goto cleanup; } xs->xattr_bh = blk_bh; - xb = (struct ocfs2_xattr_block *)blk_bh->b_data; if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { xs->header = &xb->xb_attrs.xb_header; @@ -1806,52 +1797,6 @@ cleanup: } /* - * When all the xattrs are deleted from index btree, the ocfs2_xattr_tree - * will be erased and ocfs2_xattr_block will have its ocfs2_xattr_header - * re-initialized. - */ -static int ocfs2_restore_xattr_block(struct inode *inode, - struct ocfs2_xattr_search *xs) -{ - int ret; - handle_t *handle; - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct ocfs2_xattr_block *xb = - (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; - struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; - u16 xb_flags = le16_to_cpu(xb->xb_flags); - - BUG_ON(!(xb_flags & OCFS2_XATTR_INDEXED) || - le16_to_cpu(el->l_next_free_rec) != 0); - - handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_UPDATE_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - handle = NULL; - goto out; - } - - ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret < 0) { - mlog_errno(ret); - goto out_commit; - } - - memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - - offsetof(struct ocfs2_xattr_block, xb_attrs)); - - xb->xb_flags = cpu_to_le16(xb_flags & ~OCFS2_XATTR_INDEXED); - - ocfs2_journal_dirty(handle, xs->xattr_bh); - -out_commit: - ocfs2_commit_trans(osb, handle); -out: - return ret; -} - -/* * ocfs2_xattr_block_set() * * Set, replace or remove an extended attribute into external block. @@ -1961,8 +1906,6 @@ out: } ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs); - if (!ret && xblk->xb_attrs.xb_root.xt_list.l_next_free_rec == 0) - ret = ocfs2_restore_xattr_block(inode, xs); end: @@ -2398,7 +2341,8 @@ static int ocfs2_xattr_index_block_find(struct inode *inode, BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); mlog(0, "find xattr extent rec %u clusters from %llu, the first hash " - "in the rec is %u\n", num_clusters, p_blkno, first_hash); + "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno, + first_hash); ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, p_blkno, first_hash, num_clusters, xs); @@ -2422,7 +2366,7 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode, memset(&bucket, 0, sizeof(bucket)); mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n", - clusters, blkno); + clusters, (unsigned long long)blkno); for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) { ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, @@ -2440,7 +2384,8 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode, if (i == 0) num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets); - mlog(0, "iterating xattr bucket %llu, first hash %u\n", blkno, + mlog(0, "iterating xattr bucket %llu, first hash %u\n", + (unsigned long long)blkno, le32_to_cpu(bucket.xh->xh_entries[0].xe_name_hash)); if (func) { ret = func(inode, &bucket, para); @@ -2776,7 +2721,8 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, */ blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); - mlog(0, "allocate 1 cluster from %llu to xattr block\n", blkno); + mlog(0, "allocate 1 cluster from %llu to xattr block\n", + (unsigned long long)blkno); xh_bh = sb_getblk(inode->i_sb, blkno); if (!xh_bh) { @@ -2818,7 +2764,11 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, if (data_bh) ocfs2_journal_dirty(handle, data_bh); - ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh); + ret = ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh); + if (ret) { + mlog_errno(ret); + goto out_commit; + } /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - @@ -2941,8 +2891,8 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, mlog(0, "adjust xattr bucket in %llu, count = %u, " "xh_free_start = %u, xh_name_value_len = %u.\n", - blkno, le16_to_cpu(xh->xh_count), xh_free_start, - le16_to_cpu(xh->xh_name_value_len)); + (unsigned long long)blkno, le16_to_cpu(xh->xh_count), + xh_free_start, le16_to_cpu(xh->xh_name_value_len)); /* * sort all the entries by their offset. @@ -3058,7 +3008,7 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, prev_blkno += (num_clusters - 1) * bpc + bpc / 2; mlog(0, "move half of xattrs in cluster %llu to %llu\n", - prev_blkno, new_blkno); + (unsigned long long)prev_blkno, (unsigned long long)new_blkno); /* * We need to update the 1st half of the new cluster and @@ -3168,26 +3118,74 @@ static int ocfs2_read_xattr_bucket(struct inode *inode, } /* - * Move half num of the xattrs in old bucket(blk) to new bucket(new_blk). + * Find the suitable pos when we divide a bucket into 2. + * We have to make sure the xattrs with the same hash value exist + * in the same bucket. + * + * If this ocfs2_xattr_header covers more than one hash value, find a + * place where the hash value changes. Try to find the most even split. + * The most common case is that all entries have different hash values, + * and the first check we make will find a place to split. + */ +static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh) +{ + struct ocfs2_xattr_entry *entries = xh->xh_entries; + int count = le16_to_cpu(xh->xh_count); + int delta, middle = count / 2; + + /* + * We start at the middle. Each step gets farther away in both + * directions. We therefore hit the change in hash value + * nearest to the middle. Note that this loop does not execute for + * count < 2. + */ + for (delta = 0; delta < middle; delta++) { + /* Let's check delta earlier than middle */ + if (cmp_xe(&entries[middle - delta - 1], + &entries[middle - delta])) + return middle - delta; + + /* For even counts, don't walk off the end */ + if ((middle + delta + 1) == count) + continue; + + /* Now try delta past middle */ + if (cmp_xe(&entries[middle + delta], + &entries[middle + delta + 1])) + return middle + delta + 1; + } + + /* Every entry had the same hash */ + return count; +} + +/* + * Move some xattrs in old bucket(blk) to new bucket(new_blk). * first_hash will record the 1st hash of the new bucket. + * + * Normally half of the xattrs will be moved. But we have to make + * sure that the xattrs with the same hash value are stored in the + * same bucket. If all the xattrs in this bucket have the same hash + * value, the new bucket will be initialized as an empty one and the + * first_hash will be initialized as (hash_value+1). */ -static int ocfs2_half_xattr_bucket(struct inode *inode, - handle_t *handle, - u64 blk, - u64 new_blk, - u32 *first_hash, - int new_bucket_head) +static int ocfs2_divide_xattr_bucket(struct inode *inode, + handle_t *handle, + u64 blk, + u64 new_blk, + u32 *first_hash, + int new_bucket_head) { int ret, i; - u16 count, start, len, name_value_len, xe_len, name_offset; + int count, start, len, name_value_len = 0, xe_len, name_offset = 0; u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); struct buffer_head **s_bhs, **t_bhs = NULL; struct ocfs2_xattr_header *xh; struct ocfs2_xattr_entry *xe; int blocksize = inode->i_sb->s_blocksize; - mlog(0, "move half of xattrs from bucket %llu to %llu\n", - blk, new_blk); + mlog(0, "move some of xattrs from bucket %llu to %llu\n", + (unsigned long long)blk, (unsigned long long)new_blk); s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); if (!s_bhs) @@ -3220,21 +3218,44 @@ static int ocfs2_half_xattr_bucket(struct inode *inode, for (i = 0; i < blk_per_bucket; i++) { ret = ocfs2_journal_access(handle, inode, t_bhs[i], - OCFS2_JOURNAL_ACCESS_CREATE); + new_bucket_head ? + OCFS2_JOURNAL_ACCESS_CREATE : + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } } + xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; + count = le16_to_cpu(xh->xh_count); + start = ocfs2_xattr_find_divide_pos(xh); + + if (start == count) { + xe = &xh->xh_entries[start-1]; + + /* + * initialized a new empty bucket here. + * The hash value is set as one larger than + * that of the last entry in the previous bucket. + */ + for (i = 0; i < blk_per_bucket; i++) + memset(t_bhs[i]->b_data, 0, blocksize); + + xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; + xh->xh_free_start = cpu_to_le16(blocksize); + xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; + le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); + + goto set_num_buckets; + } + /* copy the whole bucket to the new first. */ for (i = 0; i < blk_per_bucket; i++) memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize); /* update the new bucket. */ xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; - count = le16_to_cpu(xh->xh_count); - start = count / 2; /* * Calculate the total name/value len and xh_free_start for @@ -3291,6 +3312,7 @@ static int ocfs2_half_xattr_bucket(struct inode *inode, xh->xh_free_start = xe->xe_name_offset; } +set_num_buckets: /* set xh->xh_num_buckets for the new xh. */ if (new_bucket_head) xh->xh_num_buckets = cpu_to_le16(1); @@ -3308,9 +3330,13 @@ static int ocfs2_half_xattr_bucket(struct inode *inode, *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); /* - * Now only update the 1st block of the old bucket. - * Please note that the entry has been sorted already above. + * Now only update the 1st block of the old bucket. If we + * just added a new empty bucket, there is no need to modify + * it. */ + if (start == count) + goto out; + xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; memset(&xh->xh_entries[start], 0, sizeof(struct ocfs2_xattr_entry) * (count - start)); @@ -3358,7 +3384,8 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, BUG_ON(s_blkno == t_blkno); mlog(0, "cp bucket %llu to %llu, target is %d\n", - s_blkno, t_blkno, t_is_new); + (unsigned long long)s_blkno, (unsigned long long)t_blkno, + t_is_new); s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, GFP_NOFS); @@ -3382,6 +3409,8 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, for (i = 0; i < blk_per_bucket; i++) { ret = ocfs2_journal_access(handle, inode, t_bhs[i], + t_is_new ? + OCFS2_JOURNAL_ACCESS_CREATE : OCFS2_JOURNAL_ACCESS_WRITE); if (ret) goto out; @@ -3428,7 +3457,8 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode, struct ocfs2_xattr_header *xh; u64 to_blk_start = to_blk; - mlog(0, "cp xattrs from cluster %llu to %llu\n", src_blk, to_blk); + mlog(0, "cp xattrs from cluster %llu to %llu\n", + (unsigned long long)src_blk, (unsigned long long)to_blk); /* * We need to update the new cluster and 1 more for the update of @@ -3493,15 +3523,15 @@ out: } /* - * Move half of the xattrs in this cluster to the new cluster. + * Move some xattrs in this cluster to the new cluster. * This function should only be called when bucket size == cluster size. * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. */ -static int ocfs2_half_xattr_cluster(struct inode *inode, - handle_t *handle, - u64 prev_blk, - u64 new_blk, - u32 *first_hash) +static int ocfs2_divide_xattr_cluster(struct inode *inode, + handle_t *handle, + u64 prev_blk, + u64 new_blk, + u32 *first_hash) { u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); int ret, credits = 2 * blk_per_bucket; @@ -3515,8 +3545,8 @@ static int ocfs2_half_xattr_cluster(struct inode *inode, } /* Move half of the xattr in start_blk to the next bucket. */ - return ocfs2_half_xattr_bucket(inode, handle, prev_blk, - new_blk, first_hash, 1); + return ocfs2_divide_xattr_bucket(inode, handle, prev_blk, + new_blk, first_hash, 1); } /* @@ -3559,7 +3589,8 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n", - prev_blk, prev_clusters, new_blk); + (unsigned long long)prev_blk, prev_clusters, + (unsigned long long)new_blk); if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, @@ -3578,9 +3609,9 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, last_blk, new_blk, v_start); else { - ret = ocfs2_half_xattr_cluster(inode, handle, - last_blk, new_blk, - v_start); + ret = ocfs2_divide_xattr_cluster(inode, handle, + last_blk, new_blk, + v_start); if ((*header_bh)->b_blocknr == last_blk && extend) *extend = 0; @@ -3629,7 +3660,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, " "previous xattr blkno = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, - prev_cpos, prev_blkno); + prev_cpos, (unsigned long long)prev_blkno); ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); @@ -3716,7 +3747,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, } } mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", - num_bits, block, v_start); + num_bits, (unsigned long long)block, v_start); ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block, num_bits, 0, meta_ac); if (ret < 0) { @@ -3761,7 +3792,7 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode, u16 bucket = le16_to_cpu(first_xh->xh_num_buckets); mlog(0, "extend xattr bucket in %llu, xattr extend rec starting " - "from %llu, len = %u\n", start_blk, + "from %llu, len = %u\n", (unsigned long long)start_blk, (unsigned long long)first_bh->b_blocknr, num_clusters); BUG_ON(bucket >= num_buckets); @@ -3797,8 +3828,8 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode, } /* Move half of the xattr in start_blk to the next bucket. */ - ret = ocfs2_half_xattr_bucket(inode, handle, start_blk, - start_blk + blk_per_bucket, NULL, 0); + ret = ocfs2_divide_xattr_bucket(inode, handle, start_blk, + start_blk + blk_per_bucket, NULL, 0); le16_add_cpu(&first_xh->xh_num_buckets, 1); ocfs2_journal_dirty(handle, first_bh); @@ -4146,7 +4177,7 @@ static int ocfs2_xattr_value_update_size(struct inode *inode, handle_t *handle = NULL; handle = ocfs2_start_trans(osb, 1); - if (handle == NULL) { + if (IS_ERR(handle)) { ret = -ENOMEM; mlog_errno(ret); goto out; @@ -4313,7 +4344,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode, } handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); - if (handle == NULL) { + if (IS_ERR(handle)) { ret = -ENOMEM; mlog_errno(ret); goto out; @@ -4489,11 +4520,21 @@ out: return ret; } -/* check whether the xattr bucket is filled up with the same hash value. */ +/* + * check whether the xattr bucket is filled up with the same hash value. + * If we want to insert the xattr with the same hash, return -ENOSPC. + * If we want to insert a xattr with different hash value, go ahead + * and ocfs2_divide_xattr_bucket will handle this. + */ static int ocfs2_check_xattr_bucket_collision(struct inode *inode, - struct ocfs2_xattr_bucket *bucket) + struct ocfs2_xattr_bucket *bucket, + const char *name) { struct ocfs2_xattr_header *xh = bucket->xh; + u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); + + if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) + return 0; if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == xh->xh_entries[0].xe_name_hash) { @@ -4616,7 +4657,9 @@ try_again: * one bucket's worth, so check it here whether we need to * add a new bucket for the insert. */ - ret = ocfs2_check_xattr_bucket_collision(inode, &xs->bucket); + ret = ocfs2_check_xattr_bucket_collision(inode, + &xs->bucket, + xi->name); if (ret) { mlog_errno(ret); goto out; @@ -4727,14 +4770,11 @@ out: /* * 'trusted' attributes support */ - -#define XATTR_TRUSTED_PREFIX "trusted." - static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX) - 1; + const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; if (list && total_len <= list_size) { @@ -4771,18 +4811,14 @@ struct xattr_handler ocfs2_xattr_trusted_handler = { .set = ocfs2_xattr_trusted_set, }; - /* * 'user' attributes support */ - -#define XATTR_USER_PREFIX "user." - static size_t ocfs2_xattr_user_list(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const size_t prefix_len = sizeof(XATTR_USER_PREFIX) - 1; + const size_t prefix_len = XATTR_USER_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index c25c7c62a059..1d8314c7656d 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h @@ -3,24 +3,16 @@ * * xattr.h * - * Function prototypes - * - * Copyright (C) 2008 Oracle. All rights reserved. + * Copyright (C) 2004, 2008 Oracle. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * License version 2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_XATTR_H @@ -40,29 +32,11 @@ enum ocfs2_xattr_type { extern struct xattr_handler ocfs2_xattr_user_handler; extern struct xattr_handler ocfs2_xattr_trusted_handler; - -extern ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); -extern int ocfs2_xattr_get(struct inode *, int, const char *, void *, size_t); -extern int ocfs2_xattr_set(struct inode *, int, const char *, const void *, - size_t, int); -extern int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh); extern struct xattr_handler *ocfs2_xattr_handlers[]; -static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) -{ - return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; -} - -static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) -{ - return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); -} - -static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb) -{ - u16 len = sb->s_blocksize - - offsetof(struct ocfs2_xattr_header, xh_entries); +ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); +int ocfs2_xattr_set(struct inode *, int, const char *, const void *, + size_t, int); +int ocfs2_xattr_remove(struct inode *, struct buffer_head *); - return len / sizeof(struct ocfs2_xattr_entry); -} #endif /* OCFS2_XATTR_H */ diff --git a/fs/pipe.c b/fs/pipe.c index fcba6542b8d0..7aea8b89baac 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -717,14 +717,12 @@ pipe_rdwr_fasync(int fd, struct file *filp, int on) static int pipe_read_release(struct inode *inode, struct file *filp) { - pipe_read_fasync(-1, filp, 0); return pipe_release(inode, 1, 0); } static int pipe_write_release(struct inode *inode, struct file *filp) { - pipe_write_fasync(-1, filp, 0); return pipe_release(inode, 0, 1); } @@ -733,7 +731,6 @@ pipe_rdwr_release(struct inode *inode, struct file *filp) { int decr, decw; - pipe_rdwr_fasync(-1, filp, 0); decr = (filp->f_mode & FMODE_READ) != 0; decw = (filp->f_mode & FMODE_WRITE) != 0; return pipe_release(inode, decr, decw); diff --git a/fs/proc/array.c b/fs/proc/array.c index bb9f4b05703d..6af7fba7abb1 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -40,7 +40,7 @@ * * * Alan Cox : security fixes. - * <Alan.Cox@linux.org> + * <alan@lxorguk.ukuu.org.uk> * * Al Viro : safe handling of mm_struct * diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 0c10a0b3f146..df26aa88fa47 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -1,43 +1,45 @@ -#include <linux/fs.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/sched.h> -#include <linux/seq_file.h> #include <linux/time.h> #include <asm/cputime.h> -static int uptime_proc_show(struct seq_file *m, void *v) +static int proc_calc_metrics(char *page, char **start, off_t off, + int count, int *eof, int len) +{ + if (len <= off + count) + *eof = 1; + *start = page + off; + len -= off; + if (len > count) + len = count; + if (len < 0) + len = 0; + return len; +} + +static int uptime_read_proc(char *page, char **start, off_t off, int count, + int *eof, void *data) { struct timespec uptime; struct timespec idle; + int len; cputime_t idletime = cputime_add(init_task.utime, init_task.stime); do_posix_clock_monotonic_gettime(&uptime); monotonic_to_bootbased(&uptime); cputime_to_timespec(idletime, &idle); - seq_printf(m, "%lu.%02lu %lu.%02lu\n", + len = sprintf(page, "%lu.%02lu %lu.%02lu\n", (unsigned long) uptime.tv_sec, (uptime.tv_nsec / (NSEC_PER_SEC / 100)), (unsigned long) idle.tv_sec, (idle.tv_nsec / (NSEC_PER_SEC / 100))); - return 0; + return proc_calc_metrics(page, start, off, count, eof, len); } -static int uptime_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, uptime_proc_show, NULL); -} - -static const struct file_operations uptime_proc_fops = { - .open = uptime_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_uptime_init(void) { - proc_create("uptime", 0, NULL, &uptime_proc_fops); + create_proc_read_entry("uptime", 0, NULL, uptime_read_proc, NULL); return 0; } module_init(proc_uptime_init); diff --git a/fs/splice.c b/fs/splice.c index a1e701c27156..1abab5cee4ba 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -731,8 +731,8 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, }; /* - * The actor worker might be calling ->prepare_write and - * ->commit_write. Most of the time, these expect i_mutex to + * The actor worker might be calling ->write_begin and + * ->write_end. Most of the time, these expect i_mutex to * be held. Since this may result in an ABBA deadlock with * pipe->inode, we have to order lock acquiry here. */ diff --git a/fs/vfat/Makefile b/fs/vfat/Makefile deleted file mode 100644 index 40f2798a4f08..000000000000 --- a/fs/vfat/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the linux vfat-filesystem routines. -# - -obj-$(CONFIG_VFAT_FS) += vfat.o - -vfat-y := namei.o diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 9e561a9cefca..a11a8390bf6c 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -1566,11 +1566,14 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) int nmap, error, w, count, c, got, i, mapi; xfs_trans_t *tp; xfs_mount_t *mp; + xfs_drfsbno_t nblks; dp = args->dp; mp = dp->i_mount; w = args->whichfork; tp = args->trans; + nblks = dp->i_d.di_nblocks; + /* * For new directories adjust the file offset and block count. */ @@ -1647,6 +1650,8 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) } if (mapp != &map) kmem_free(mapp); + /* account for newly allocated blocks in reserved blocks total */ + args->total -= dp->i_d.di_nblocks - nblks; *new_blkno = (xfs_dablk_t)bno; return 0; } diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index 80e0dc51361c..1afb12278b8d 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -525,11 +525,13 @@ xfs_dir2_grow_inode( xfs_mount_t *mp; int nmap; /* number of bmap entries */ xfs_trans_t *tp; + xfs_drfsbno_t nblks; xfs_dir2_trace_args_s("grow_inode", args, space); dp = args->dp; tp = args->trans; mp = dp->i_mount; + nblks = dp->i_d.di_nblocks; /* * Set lowest possible block in the space requested. */ @@ -622,7 +624,11 @@ xfs_dir2_grow_inode( */ if (mapp != &map) kmem_free(mapp); + + /* account for newly allocated blocks in reserved blocks total */ + args->total -= dp->i_d.di_nblocks - nblks; *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); + /* * Update file's size if this is the data space and it grew. */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index dbd9cef852ec..a391b955df01 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1414,7 +1414,7 @@ xfs_itruncate_start( mp = ip->i_mount; /* wait for the completion of any pending DIOs */ - if (new_size < ip->i_size) + if (new_size == 0 || new_size < ip->i_size) vn_iowait(ip); /* diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 0b02c6443551..3608a0f0a5f6 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -563,6 +563,11 @@ xfs_log_mount( } mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); + if (!mp->m_log) { + cmn_err(CE_WARN, "XFS: Log allocation failed: No memory!"); + error = ENOMEM; + goto out; + } /* * Initialize the AIL now we have a log. @@ -601,6 +606,7 @@ xfs_log_mount( return 0; error: xfs_log_unmount_dealloc(mp); +out: return error; } /* xfs_log_mount */ @@ -1217,7 +1223,9 @@ xlog_alloc_log(xfs_mount_t *mp, int i; int iclogsize; - log = (xlog_t *)kmem_zalloc(sizeof(xlog_t), KM_SLEEP); + log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); + if (!log) + return NULL; log->l_mp = mp; log->l_targ = log_target; @@ -1249,6 +1257,8 @@ xlog_alloc_log(xfs_mount_t *mp, xlog_get_iclog_buffer_size(mp, log); bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); + if (!bp) + goto out_free_log; XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); @@ -1275,13 +1285,17 @@ xlog_alloc_log(xfs_mount_t *mp, iclogsize = log->l_iclog_size; ASSERT(log->l_iclog_size >= 4096); for (i=0; i < log->l_iclog_bufs; i++) { - *iclogp = (xlog_in_core_t *) - kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP); + *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL); + if (!*iclogp) + goto out_free_iclog; + iclog = *iclogp; iclog->ic_prev = prev_iclog; prev_iclog = iclog; bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp); + if (!bp) + goto out_free_iclog; if (!XFS_BUF_CPSEMA(bp)) ASSERT(0); XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); @@ -1323,6 +1337,25 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ return log; + +out_free_iclog: + for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { + prev_iclog = iclog->ic_next; + if (iclog->ic_bp) { + sv_destroy(&iclog->ic_force_wait); + sv_destroy(&iclog->ic_write_wait); + xfs_buf_free(iclog->ic_bp); + xlog_trace_iclog_dealloc(iclog); + } + kmem_free(iclog); + } + spinlock_destroy(&log->l_icloglock); + spinlock_destroy(&log->l_grant_lock); + xlog_trace_loggrant_dealloc(log); + xfs_buf_free(log->l_xbuf); +out_free_log: + kmem_free(log); + return NULL; } /* xlog_alloc_log */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 82d46ce69d5f..70e3ba32e6be 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1419,7 +1419,13 @@ xlog_recover_add_to_trans( return 0; item = trans->r_itemq; if (item == NULL) { - ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC); + /* we need to catch log corruptions here */ + if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { + xlog_warn("XFS: xlog_recover_add_to_trans: " + "bad header magic number"); + ASSERT(0); + return XFS_ERROR(EIO); + } if (len == sizeof(xfs_trans_header_t)) xlog_recover_add_item(&trans->r_itemq); memcpy(&trans->r_theader, dp, len); /* d, s, l */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index a4503f5e9497..15f5dd22fbb2 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1245,6 +1245,9 @@ xfs_unmountfs( XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); + if (mp->m_quotainfo) + XFS_QM_DONE(mp); + /* * Flush out the log synchronously so that we know for sure * that nothing is pinned. This is important because bflush() @@ -1297,8 +1300,6 @@ xfs_unmountfs( xfs_errortag_clearall(mp, 0); #endif xfs_free_perag(mp); - if (mp->m_quotainfo) - XFS_QM_DONE(mp); } STATIC void |