From f418b006079ce537daf9436215f1d2a47e451602 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Mon, 28 Jul 2008 13:32:38 -0400
Subject: [PATCH 01/12] Re: BUG at security/selinux/avc.c:883 (was: Re:
 linux-next: Tree for July 17: early crash on x86-64)

SELinux needs MAY_APPEND to be passed down to the security hook.
Otherwise, we get permission denials when only append permission is
granted by policy even if the opening process specified O_APPEND.
Shows up as a regression in the ltp selinux testsuite, fixed by
this patch.

Signed-off-by:  Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/namei.c b/fs/namei.c
index a7b0a0b801287..b91e9732b24a6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -274,7 +274,7 @@ int inode_permission(struct inode *inode, int mask)
 		return retval;
 
 	return security_inode_permission(inode,
-			mask & (MAY_READ|MAY_WRITE|MAY_EXEC));
+			mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND));
 }
 
 /**

From a1bc6eb4b499ae67ada9a01660010580b6569403 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 30 Jul 2008 06:32:52 -0400
Subject: [PATCH 02/12] [PATCH] ipv4_static_sysctl_init() should be under
 CONFIG_SYSCTL

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/ipv4/route.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 380d6474cf661..a72a5ad46ec5b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3216,6 +3216,7 @@ int __init ip_rt_init(void)
 	return rc;
 }
 
+#ifdef CONFIG_SYSCTL
 /*
  * We really need to sanitize the damn ipv4 init order, then all
  * this nonsense will go away.
@@ -3224,6 +3225,7 @@ void __init ip_static_sysctl_init(void)
 {
 	register_sysctl_paths(ipv4_route_path, ipv4_route_table);
 }
+#endif
 
 EXPORT_SYMBOL(__ip_select_ident);
 EXPORT_SYMBOL(ip_route_input);

From 1027abe8827b47f7e9c4ed6514fde3d44f79963c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 30 Jul 2008 04:13:04 -0400
Subject: [PATCH 03/12] [PATCH] merge locate_fd() and get_unused_fd()

	New primitive: alloc_fd(start, flags).  get_unused_fd() and
get_unused_fd_flags() become wrappers on top of it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fcntl.c           | 87 +++++++-------------------------------------
 fs/file.c            | 61 +++++++++++++++++++++++++++++++
 fs/open.c            | 56 ----------------------------
 include/linux/file.h |  3 +-
 4 files changed, 77 insertions(+), 130 deletions(-)

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 61d6251368136..2e40799daad68 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -49,73 +49,6 @@ static int get_close_on_exec(unsigned int fd)
 	return res;
 }
 
-/*
- * locate_fd finds a free file descriptor in the open_fds fdset,
- * expanding the fd arrays if necessary.  Must be called with the
- * file_lock held for write.
- */
-
-static int locate_fd(unsigned int orig_start, int cloexec)
-{
-	struct files_struct *files = current->files;
-	unsigned int newfd;
-	unsigned int start;
-	int error;
-	struct fdtable *fdt;
-
-	spin_lock(&files->file_lock);
-repeat:
-	fdt = files_fdtable(files);
-	/*
-	 * Someone might have closed fd's in the range
-	 * orig_start..fdt->next_fd
-	 */
-	start = orig_start;
-	if (start < files->next_fd)
-		start = files->next_fd;
-
-	newfd = start;
-	if (start < fdt->max_fds)
-		newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
-					   fdt->max_fds, start);
-
-	error = expand_files(files, newfd);
-	if (error < 0)
-		goto out;
-
-	/*
-	 * If we needed to expand the fs array we
-	 * might have blocked - try again.
-	 */
-	if (error)
-		goto repeat;
-
-	if (start <= files->next_fd)
-		files->next_fd = newfd + 1;
-
-	FD_SET(newfd, fdt->open_fds);
-	if (cloexec)
-		FD_SET(newfd, fdt->close_on_exec);
-	else
-		FD_CLR(newfd, fdt->close_on_exec);
-	error = newfd;
-
-out:
-	spin_unlock(&files->file_lock);
-	return error;
-}
-
-static int dupfd(struct file *file, unsigned int start, int cloexec)
-{
-	int fd = locate_fd(start, cloexec);
-	if (fd >= 0)
-		fd_install(fd, file);
-	else
-		fput(file);
-
-	return fd;
-}
-
 asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
 {
 	int err = -EBADF;
@@ -194,10 +127,15 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 asmlinkage long sys_dup(unsigned int fildes)
 {
 	int ret = -EBADF;
-	struct file * file = fget(fildes);
-
-	if (file)
-		ret = dupfd(file, 0, 0);
+	struct file *file = fget(fildes);
+
+	if (file) {
+		ret = get_unused_fd();
+		if (ret >= 0)
+			fd_install(ret, file);
+		else
+			fput(file);
+	}
 	return ret;
 }
 
@@ -322,8 +260,11 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	case F_DUPFD_CLOEXEC:
 		if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
 			break;
-		get_file(filp);
-		err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC);
+		err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
+		if (err >= 0) {
+			get_file(filp);
+			fd_install(err, filp);
+		}
 		break;
 	case F_GETFD:
 		err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
diff --git a/fs/file.c b/fs/file.c
index d8773b19fe47f..f313314f996fc 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -6,6 +6,7 @@
  *  Manage the dynamic fd arrays in the process files_struct.
  */
 
+#include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/time.h>
@@ -432,3 +433,63 @@ struct files_struct init_files = {
 	},
 	.file_lock	= __SPIN_LOCK_UNLOCKED(init_task.file_lock),
 };
+
+/*
+ * allocate a file descriptor, mark it busy.
+ */
+int alloc_fd(unsigned start, unsigned flags)
+{
+	struct files_struct *files = current->files;
+	unsigned int fd;
+	int error;
+	struct fdtable *fdt;
+
+	spin_lock(&files->file_lock);
+repeat:
+	fdt = files_fdtable(files);
+	fd = start;
+	if (fd < files->next_fd)
+		fd = files->next_fd;
+
+	if (fd < fdt->max_fds)
+		fd = find_next_zero_bit(fdt->open_fds->fds_bits,
+					   fdt->max_fds, fd);
+
+	error = expand_files(files, fd);
+	if (error < 0)
+		goto out;
+
+	/*
+	 * If we needed to expand the fs array we
+	 * might have blocked - try again.
+	 */
+	if (error)
+		goto repeat;
+
+	if (start <= files->next_fd)
+		files->next_fd = fd + 1;
+
+	FD_SET(fd, fdt->open_fds);
+	if (flags & O_CLOEXEC)
+		FD_SET(fd, fdt->close_on_exec);
+	else
+		FD_CLR(fd, fdt->close_on_exec);
+	error = fd;
+#if 1
+	/* Sanity check */
+	if (rcu_dereference(fdt->fd[fd]) != NULL) {
+		printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
+		rcu_assign_pointer(fdt->fd[fd], NULL);
+	}
+#endif
+
+out:
+	spin_unlock(&files->file_lock);
+	return error;
+}
+
+int get_unused_fd(void)
+{
+	return alloc_fd(0, 0);
+}
+EXPORT_SYMBOL(get_unused_fd);
diff --git a/fs/open.c b/fs/open.c
index 52647be277a2c..07da9359481c7 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -963,62 +963,6 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
 }
 EXPORT_SYMBOL(dentry_open);
 
-/*
- * Find an empty file descriptor entry, and mark it busy.
- */
-int get_unused_fd_flags(int flags)
-{
-	struct files_struct * files = current->files;
-	int fd, error;
-	struct fdtable *fdt;
-
-	spin_lock(&files->file_lock);
-
-repeat:
-	fdt = files_fdtable(files);
-	fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,
-				files->next_fd);
-
-	/* Do we need to expand the fd array or fd set?  */
-	error = expand_files(files, fd);
-	if (error < 0)
-		goto out;
-
-	if (error) {
-		/*
-	 	 * If we needed to expand the fs array we
-		 * might have blocked - try again.
-		 */
-		goto repeat;
-	}
-
-	FD_SET(fd, fdt->open_fds);
-	if (flags & O_CLOEXEC)
-		FD_SET(fd, fdt->close_on_exec);
-	else
-		FD_CLR(fd, fdt->close_on_exec);
-	files->next_fd = fd + 1;
-#if 1
-	/* Sanity check */
-	if (fdt->fd[fd] != NULL) {
-		printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
-		fdt->fd[fd] = NULL;
-	}
-#endif
-	error = fd;
-
-out:
-	spin_unlock(&files->file_lock);
-	return error;
-}
-
-int get_unused_fd(void)
-{
-	return get_unused_fd_flags(0);
-}
-
-EXPORT_SYMBOL(get_unused_fd);
-
 static void __put_unused_fd(struct files_struct *files, unsigned int fd)
 {
 	struct fdtable *fdt = files_fdtable(files);
diff --git a/include/linux/file.h b/include/linux/file.h
index 27c64bdc68c96..a20259e248a5a 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -34,8 +34,9 @@ extern struct file *fget(unsigned int fd);
 extern struct file *fget_light(unsigned int fd, int *fput_needed);
 extern void set_close_on_exec(unsigned int fd, int flag);
 extern void put_filp(struct file *);
+extern int alloc_fd(unsigned start, unsigned flags);
 extern int get_unused_fd(void);
-extern int get_unused_fd_flags(int flags);
+#define get_unused_fd_flags(flags) alloc_fd(0, (flags))
 extern void put_unused_fd(unsigned int fd);
 
 extern void fd_install(unsigned int fd, struct file *file);

From 1b7e190b4764ea3ca1080404dd593eae5230d2b3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 30 Jul 2008 06:18:03 -0400
Subject: [PATCH 04/12] [PATCH] clean dup2() up a bit

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fcntl.c | 53 +++++++++++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 2e40799daad68..ac4f7db9f1345 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -63,31 +63,35 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
 		return -EINVAL;
 
 	spin_lock(&files->file_lock);
-	if (!(file = fcheck(oldfd)))
-		goto out_unlock;
-	get_file(file);			/* We are now finished with oldfd */
-
 	err = expand_files(files, newfd);
+	file = fcheck(oldfd);
+	if (unlikely(!file))
+		goto Ebadf;
 	if (unlikely(err < 0)) {
 		if (err == -EMFILE)
-			err = -EBADF;
-		goto out_fput;
+			goto Ebadf;
+		goto out_unlock;
 	}
-
-	/* To avoid races with open() and dup(), we will mark the fd as
-	 * in-use in the open-file bitmap throughout the entire dup2()
-	 * process.  This is quite safe: do_close() uses the fd array
-	 * entry, not the bitmap, to decide what work needs to be
-	 * done.  --sct */
-	/* Doesn't work. open() might be there first. --AV */
-
-	/* Yes. It's a race. In user space. Nothing sane to do */
+	/*
+	 * We need to detect attempts to do dup2() over allocated but still
+	 * not finished descriptor.  NB: OpenBSD avoids that at the price of
+	 * extra work in their equivalent of fget() - they insert struct
+	 * file immediately after grabbing descriptor, mark it larval if
+	 * more work (e.g. actual opening) is needed and make sure that
+	 * fget() treats larval files as absent.  Potentially interesting,
+	 * but while extra work in fget() is trivial, locking implications
+	 * and amount of surgery on open()-related paths in VFS are not.
+	 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
+	 * deadlocks in rather amusing ways, AFAICS.  All of that is out of
+	 * scope of POSIX or SUS, since neither considers shared descriptor
+	 * tables and this condition does not arise without those.
+	 */
 	err = -EBUSY;
 	fdt = files_fdtable(files);
 	tofree = fdt->fd[newfd];
 	if (!tofree && FD_ISSET(newfd, fdt->open_fds))
-		goto out_fput;
-
+		goto out_unlock;
+	get_file(file);
 	rcu_assign_pointer(fdt->fd[newfd], file);
 	FD_SET(newfd, fdt->open_fds);
 	if (flags & O_CLOEXEC)
@@ -98,17 +102,14 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
 
 	if (tofree)
 		filp_close(tofree, files);
-	err = newfd;
-out:
-	return err;
-out_unlock:
-	spin_unlock(&files->file_lock);
-	goto out;
 
-out_fput:
+	return newfd;
+
+Ebadf:
+	err = -EBADF;
+out_unlock:
 	spin_unlock(&files->file_lock);
-	fput(file);
-	goto out;
+	return err;
 }
 
 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)

From 77e69dac3cefacee939cb107ae9cd520a62338e0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 1 Aug 2008 04:29:18 -0400
Subject: [PATCH 05/12] [PATCH] fix races and leaks in vfs_quota_on() users

* new helper: vfs_quota_on_path(); equivalent of vfs_quota_on() sans the
  pathname resolution.
* callers of vfs_quota_on() that do their own pathname resolution and
  checks based on it are switched to vfs_quota_on_path(); that way we
  avoid the races.
* reiserfs leaked dentry/vfsmount references on several failure exits.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dquot.c               | 33 ++++++++++++++++++++-------------
 fs/ext3/super.c          |  3 ++-
 fs/ext4/super.c          |  3 ++-
 fs/reiserfs/super.c      | 16 +++++++++-------
 include/linux/quotaops.h |  2 ++
 5 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/fs/dquot.c b/fs/dquot.c
index 1346eebe74ce9..8ec4d6cc76331 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1793,6 +1793,21 @@ static int vfs_quota_on_remount(struct super_block *sb, int type)
 	return ret;
 }
 
+int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
+		      struct path *path)
+{
+	int error = security_quota_on(path->dentry);
+	if (error)
+		return error;
+	/* Quota file not on the same filesystem? */
+	if (path->mnt->mnt_sb != sb)
+		error = -EXDEV;
+	else
+		error = vfs_quota_on_inode(path->dentry->d_inode, type,
+					   format_id);
+	return error;
+}
+
 /* Actual function called from quotactl() */
 int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
 		 int remount)
@@ -1804,19 +1819,10 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
 		return vfs_quota_on_remount(sb, type);
 
 	error = path_lookup(path, LOOKUP_FOLLOW, &nd);
-	if (error < 0)
-		return error;
-	error = security_quota_on(nd.path.dentry);
-	if (error)
-		goto out_path;
-	/* Quota file not on the same filesystem? */
-	if (nd.path.mnt->mnt_sb != sb)
-		error = -EXDEV;
-	else
-		error = vfs_quota_on_inode(nd.path.dentry->d_inode, type,
-					   format_id);
-out_path:
-	path_put(&nd.path);
+	if (!error) {
+		error = vfs_quota_on_path(sb, type, format_id, &nd.path);
+		path_put(&nd.path);
+	}
 	return error;
 }
 
@@ -2185,6 +2191,7 @@ EXPORT_SYMBOL(unregister_quota_format);
 EXPORT_SYMBOL(dqstats);
 EXPORT_SYMBOL(dq_data_lock);
 EXPORT_SYMBOL(vfs_quota_on);
+EXPORT_SYMBOL(vfs_quota_on_path);
 EXPORT_SYMBOL(vfs_quota_on_mount);
 EXPORT_SYMBOL(vfs_quota_off);
 EXPORT_SYMBOL(vfs_quota_sync);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8ddced3846748..f38a5afc39a16 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2810,8 +2810,9 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
 		journal_unlock_updates(EXT3_SB(sb)->s_journal);
 	}
 
+	err = vfs_quota_on_path(sb, type, format_id, &nd.path);
 	path_put(&nd.path);
-	return vfs_quota_on(sb, type, format_id, path, remount);
+	return err;
 }
 
 /* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b5479b1dff14b..1e69f29a8c558 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3352,8 +3352,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
 	}
 
+	err = vfs_quota_on_path(sb, type, format_id, &nd.path);
 	path_put(&nd.path);
-	return vfs_quota_on(sb, type, format_id, path, remount);
+	return err;
 }
 
 /* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 879e54d35c2d4..282a13596c702 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2076,8 +2076,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
 		return err;
 	/* Quotafile not on the same filesystem? */
 	if (nd.path.mnt->mnt_sb != sb) {
-		path_put(&nd.path);
-		return -EXDEV;
+		err = -EXDEV;
+		goto out;
 	}
 	inode = nd.path.dentry->d_inode;
 	/* We must not pack tails for quota files on reiserfs for quota IO to work */
@@ -2087,8 +2087,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
 			reiserfs_warning(sb,
 				"reiserfs: Unpacking tail of quota file failed"
 				" (%d). Cannot turn on quotas.", err);
-			path_put(&nd.path);
-			return -EINVAL;
+			err = -EINVAL;
+			goto out;
 		}
 		mark_inode_dirty(inode);
 	}
@@ -2109,13 +2109,15 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
 		/* Just start temporary transaction and finish it */
 		err = journal_begin(&th, sb, 1);
 		if (err)
-			return err;
+			goto out;
 		err = journal_end_sync(&th, sb, 1);
 		if (err)
-			return err;
+			goto out;
 	}
+	err = vfs_quota_on_path(sb, type, format_id, &nd.path);
+out:
 	path_put(&nd.path);
-	return vfs_quota_on(sb, type, format_id, path, 0);
+	return err;
 }
 
 /* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 742187f7a05c5..ca6b9b5c8d527 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -43,6 +43,8 @@ int dquot_mark_dquot_dirty(struct dquot *dquot);
 
 int vfs_quota_on(struct super_block *sb, int type, int format_id,
  	char *path, int remount);
+int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
+ 	struct path *path);
 int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
  	int format_id, int type);
 int vfs_quota_off(struct super_block *sb, int type, int remount);

From 8266602033d6adc6d10cb8811c1fd694767909b0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 1 Aug 2008 05:32:04 -0400
Subject: [PATCH 06/12] [PATCH] fix bdev leak in block_dev.c do_open()

Callers expect it to drop reference to bdev on all failure exits.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/block_dev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index dcf37cada3697..e664b0b704891 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -941,8 +941,10 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
 	 * hooks: /n/, see "layering violations".
 	 */
 	ret = devcgroup_inode_permission(bdev->bd_inode, perm);
-	if (ret != 0)
+	if (ret != 0) {
+		bdput(bdev);
 		return ret;
+	}
 
 	ret = -ENXIO;
 	file->f_mapping = bdev->bd_inode->i_mapping;

From 67935df49dae836fa86621861979fafdfd37ae59 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 26 Jul 2008 11:18:28 +0400
Subject: [PATCH 07/12] [PATCH 1/2] proc: fix inode number bogorithmetic

Id which proc gets from IDR for inode number and id which proc removes
from IDR do not match. E.g. 0x11a transforms into 0x8000011a.

Which stayed unnoticed for a long time because, surprise, idr_remove()
masks out that high bit before doing anything.

All of this due to "| ~MAX_ID_MASK" in release_inode_number().

I still don't understand how it's supposed to work, because "| ~MASK"
is not an inversion for "& MAX" operation.

So, use just one nice, working addition. Make start offset unsigned int,
while I'm at it. It's longness is not used anywhere.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/generic.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index cb4096cc3fb74..b85e36e153bad 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -303,7 +303,7 @@ static int xlate_proc_name(const char *name,
 static DEFINE_IDR(proc_inum_idr);
 static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
 
-#define PROC_DYNAMIC_FIRST 0xF0000000UL
+#define PROC_DYNAMIC_FIRST 0xF0000000U
 
 /*
  * Return an inode number between PROC_DYNAMIC_FIRST and
@@ -311,7 +311,7 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
  */
 static unsigned int get_inode_number(void)
 {
-	int i, inum = 0;
+	unsigned int i;
 	int error;
 
 retry:
@@ -326,21 +326,18 @@ static unsigned int get_inode_number(void)
 	else if (error)
 		return 0;
 
-	inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST;
-
-	/* inum will never be more than 0xf0ffffff, so no check
-	 * for overflow.
-	 */
-
-	return inum;
+	if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
+		spin_lock(&proc_inum_lock);
+		idr_remove(&proc_inum_idr, i);
+		spin_unlock(&proc_inum_lock);
+	}
+	return PROC_DYNAMIC_FIRST + i;
 }
 
 static void release_inode_number(unsigned int inum)
 {
-	int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK;
-
 	spin_lock(&proc_inum_lock);
-	idr_remove(&proc_inum_idr, id);
+	idr_remove(&proc_inum_idr, inum - PROC_DYNAMIC_FIRST);
 	spin_unlock(&proc_inum_lock);
 }
 

From 9a18540915faaaadd7f71c16fa877a0c19675923 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 26 Jul 2008 11:21:37 +0400
Subject: [PATCH 08/12] [PATCH 2/2] proc: switch inode number allocation to IDA

proc doesn't use "associate pointer with id" feature of IDR, so switch
to IDA.

NOTE, NOTE, NOTE:
	Do not apply if release_inode_number() still mantions MAX_ID_MASK!

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/generic.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index b85e36e153bad..4fb81e9c94e3b 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -300,7 +300,7 @@ static int xlate_proc_name(const char *name,
 	return rtn;
 }
 
-static DEFINE_IDR(proc_inum_idr);
+static DEFINE_IDA(proc_inum_ida);
 static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
 
 #define PROC_DYNAMIC_FIRST 0xF0000000U
@@ -315,11 +315,11 @@ static unsigned int get_inode_number(void)
 	int error;
 
 retry:
-	if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0)
+	if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0)
 		return 0;
 
 	spin_lock(&proc_inum_lock);
-	error = idr_get_new(&proc_inum_idr, NULL, &i);
+	error = ida_get_new(&proc_inum_ida, &i);
 	spin_unlock(&proc_inum_lock);
 	if (error == -EAGAIN)
 		goto retry;
@@ -328,7 +328,7 @@ static unsigned int get_inode_number(void)
 
 	if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
 		spin_lock(&proc_inum_lock);
-		idr_remove(&proc_inum_idr, i);
+		ida_remove(&proc_inum_ida, i);
 		spin_unlock(&proc_inum_lock);
 	}
 	return PROC_DYNAMIC_FIRST + i;
@@ -337,7 +337,7 @@ static unsigned int get_inode_number(void)
 static void release_inode_number(unsigned int inum)
 {
 	spin_lock(&proc_inum_lock);
-	idr_remove(&proc_inum_idr, inum - PROC_DYNAMIC_FIRST);
+	ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
 	spin_unlock(&proc_inum_lock);
 }
 

From 7ee7c12b7121cd49d528de219e4ffd5459657998 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 26 Jul 2008 11:42:16 +0400
Subject: [PATCH 09/12] [PATCH] devpts: switch to IDA

Devpts code wants just numbers for tty indexes.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/devpts/inode.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 285b64a8b06e1..488eb424f6621 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -29,7 +29,7 @@
 #define DEVPTS_DEFAULT_MODE 0600
 
 extern int pty_limit;			/* Config limit on Unix98 ptys */
-static DEFINE_IDR(allocated_ptys);
+static DEFINE_IDA(allocated_ptys);
 static DEFINE_MUTEX(allocated_ptys_lock);
 
 static struct vfsmount *devpts_mnt;
@@ -180,24 +180,24 @@ static struct dentry *get_node(int num)
 int devpts_new_index(void)
 {
 	int index;
-	int idr_ret;
+	int ida_ret;
 
 retry:
-	if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
+	if (!ida_pre_get(&allocated_ptys, GFP_KERNEL)) {
 		return -ENOMEM;
 	}
 
 	mutex_lock(&allocated_ptys_lock);
-	idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
-	if (idr_ret < 0) {
+	ida_ret = ida_get_new(&allocated_ptys, &index);
+	if (ida_ret < 0) {
 		mutex_unlock(&allocated_ptys_lock);
-		if (idr_ret == -EAGAIN)
+		if (ida_ret == -EAGAIN)
 			goto retry;
 		return -EIO;
 	}
 
 	if (index >= pty_limit) {
-		idr_remove(&allocated_ptys, index);
+		ida_remove(&allocated_ptys, index);
 		mutex_unlock(&allocated_ptys_lock);
 		return -EIO;
 	}
@@ -208,7 +208,7 @@ int devpts_new_index(void)
 void devpts_kill_index(int idx)
 {
 	mutex_lock(&allocated_ptys_lock);
-	idr_remove(&allocated_ptys, idx);
+	ida_remove(&allocated_ptys, idx);
 	mutex_unlock(&allocated_ptys_lock);
 }
 

From a95164d979c5ca061f15bcaadc829c146693d4d9 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 30 Jul 2008 15:08:48 +0200
Subject: [PATCH 10/12] [patch 3/4] vfs: remove unused nameidata argument of
 may_create()

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index b91e9732b24a6..4ea63ed5e7910 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1431,8 +1431,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
  *  3. We should have write and exec permissions on dir
  *  4. We can't do it if dir is immutable (done in permission())
  */
-static inline int may_create(struct inode *dir, struct dentry *child,
-			     struct nameidata *nd)
+static inline int may_create(struct inode *dir, struct dentry *child)
 {
 	if (child->d_inode)
 		return -EEXIST;
@@ -1504,7 +1503,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
 int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 		struct nameidata *nd)
 {
-	int error = may_create(dir, dentry, nd);
+	int error = may_create(dir, dentry);
 
 	if (error)
 		return error;
@@ -1948,7 +1947,7 @@ EXPORT_SYMBOL_GPL(lookup_create);
 
 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 {
-	int error = may_create(dir, dentry, NULL);
+	int error = may_create(dir, dentry);
 
 	if (error)
 		return error;
@@ -2049,7 +2048,7 @@ asmlinkage long sys_mknod(const char __user *filename, int mode, unsigned dev)
 
 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
-	int error = may_create(dir, dentry, NULL);
+	int error = may_create(dir, dentry);
 
 	if (error)
 		return error;
@@ -2316,7 +2315,7 @@ asmlinkage long sys_unlink(const char __user *pathname)
 
 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
 {
-	int error = may_create(dir, dentry, NULL);
+	int error = may_create(dir, dentry);
 
 	if (error)
 		return error;
@@ -2386,7 +2385,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
 	if (!inode)
 		return -ENOENT;
 
-	error = may_create(dir, new_dentry, NULL);
+	error = may_create(dir, new_dentry);
 	if (error)
 		return error;
 
@@ -2595,7 +2594,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		return error;
 
 	if (!new_dentry->d_inode)
-		error = may_create(new_dir, new_dentry, NULL);
+		error = may_create(new_dir, new_dentry);
 	else
 		error = may_delete(new_dir, new_dentry, is_dir);
 	if (error)

From d5686b444ff3f72808d2b3fbd58672a86cdf38e7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 1 Aug 2008 05:00:11 -0400
Subject: [PATCH 11/12] [PATCH] switch mtd and dm-table to lookup_bdev()

No need to open-code it...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/md/dm-table.c  | 29 ++++++-----------------------
 drivers/mtd/mtdsuper.c | 33 +++++++++++----------------------
 fs/block_dev.c         |  1 +
 3 files changed, 18 insertions(+), 45 deletions(-)

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 798e468103b87..61f441409234e 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -316,29 +316,12 @@ static inline int check_space(struct dm_table *t)
  */
 static int lookup_device(const char *path, dev_t *dev)
 {
-	int r;
-	struct nameidata nd;
-	struct inode *inode;
-
-	if ((r = path_lookup(path, LOOKUP_FOLLOW, &nd)))
-		return r;
-
-	inode = nd.path.dentry->d_inode;
-	if (!inode) {
-		r = -ENOENT;
-		goto out;
-	}
-
-	if (!S_ISBLK(inode->i_mode)) {
-		r = -ENOTBLK;
-		goto out;
-	}
-
-	*dev = inode->i_rdev;
-
- out:
-	path_put(&nd.path);
-	return r;
+	struct block_device *bdev = lookup_bdev(path);
+	if (IS_ERR(bdev))
+		return PTR_ERR(bdev);
+	*dev = bdev->bd_dev;
+	bdput(bdev);
+	return 0;
 }
 
 /*
diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c
index 28cc6787a800c..9b6af7e74a654 100644
--- a/drivers/mtd/mtdsuper.c
+++ b/drivers/mtd/mtdsuper.c
@@ -125,7 +125,7 @@ int get_sb_mtd(struct file_system_type *fs_type, int flags,
 	       int (*fill_super)(struct super_block *, void *, int),
 	       struct vfsmount *mnt)
 {
-	struct nameidata nd;
+	struct block_device *bdev;
 	int mtdnr, ret;
 
 	if (!dev_name)
@@ -181,29 +181,20 @@ int get_sb_mtd(struct file_system_type *fs_type, int flags,
 	/* try the old way - the hack where we allowed users to mount
 	 * /dev/mtdblock$(n) but didn't actually _use_ the blockdev
 	 */
-	ret = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
-
-	DEBUG(1, "MTDSB: path_lookup() returned %d, inode %p\n",
-	      ret, nd.path.dentry ? nd.path.dentry->d_inode : NULL);
-
-	if (ret)
+	bdev = lookup_bdev(dev_name);
+	if (IS_ERR(bdev)) {
+		ret = PTR_ERR(bdev);
+		DEBUG(1, "MTDSB: lookup_bdev() returned %d\n", ret);
 		return ret;
-
-	ret = -EINVAL;
-
-	if (!S_ISBLK(nd.path.dentry->d_inode->i_mode))
-		goto out;
-
-	if (nd.path.mnt->mnt_flags & MNT_NODEV) {
-		ret = -EACCES;
-		goto out;
 	}
+	DEBUG(1, "MTDSB: lookup_bdev() returned 0\n");
 
-	if (imajor(nd.path.dentry->d_inode) != MTD_BLOCK_MAJOR)
+	ret = -EINVAL;
+	if (MAJOR(bdev->bd_dev) != MTD_BLOCK_MAJOR)
 		goto not_an_MTD_device;
 
-	mtdnr = iminor(nd.path.dentry->d_inode);
-	path_put(&nd.path);
+	mtdnr = MINOR(bdev->bd_dev);
+	bdput(bdev);
 
 	return get_sb_mtd_nr(fs_type, flags, dev_name, data, mtdnr, fill_super,
 			     mnt);
@@ -213,10 +204,8 @@ int get_sb_mtd(struct file_system_type *fs_type, int flags,
 		printk(KERN_NOTICE
 		       "MTD: Attempt to mount non-MTD device \"%s\"\n",
 		       dev_name);
-out:
-	path_put(&nd.path);
+	bdput(bdev);
 	return ret;
-
 }
 
 EXPORT_SYMBOL_GPL(get_sb_mtd);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index e664b0b704891..aff54219e0495 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1236,6 +1236,7 @@ struct block_device *lookup_bdev(const char *path)
 	bdev = ERR_PTR(error);
 	goto out;
 }
+EXPORT_SYMBOL(lookup_bdev);
 
 /**
  * open_bdev_excl  -  open a block device by name and set it up for use

From 8d66bf5481002b0960aa49aed0987c73f5d7816c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 1 Aug 2008 09:05:54 -0400
Subject: [PATCH 12/12] [PATCH] pass struct path * to do_add_mount()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/afs/mntpt.c         |  2 +-
 fs/cifs/cifs_dfs_ref.c |  2 +-
 fs/namespace.c         | 16 ++++++++--------
 fs/nfs/namespace.c     |  2 +-
 include/linux/mount.h  |  3 ++-
 5 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 2f5503902c375..78db4953a8004 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -232,7 +232,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
 	}
 
 	mntget(newmnt);
-	err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts);
+	err = do_add_mount(newmnt, &nd->path, MNT_SHRINKABLE, &afs_vfsmounts);
 	switch (err) {
 	case 0:
 		path_put(&nd->path);
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index d82374c9e3296..d2c8eef84f3c7 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -226,7 +226,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
 	int err;
 
 	mntget(newmnt);
-	err = do_add_mount(newmnt, nd, nd->path.mnt->mnt_flags, mntlist);
+	err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist);
 	switch (err) {
 	case 0:
 		path_put(&nd->path);
diff --git a/fs/namespace.c b/fs/namespace.c
index 411728c0c8bb2..6e283c93b50da 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1667,31 +1667,31 @@ static noinline int do_new_mount(struct nameidata *nd, char *type, int flags,
 	if (IS_ERR(mnt))
 		return PTR_ERR(mnt);
 
-	return do_add_mount(mnt, nd, mnt_flags, NULL);
+	return do_add_mount(mnt, &nd->path, mnt_flags, NULL);
 }
 
 /*
  * add a mount into a namespace's mount tree
  * - provide the option of adding the new mount to an expiration list
  */
-int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
+int do_add_mount(struct vfsmount *newmnt, struct path *path,
 		 int mnt_flags, struct list_head *fslist)
 {
 	int err;
 
 	down_write(&namespace_sem);
 	/* Something was mounted here while we slept */
-	while (d_mountpoint(nd->path.dentry) &&
-	       follow_down(&nd->path.mnt, &nd->path.dentry))
+	while (d_mountpoint(path->dentry) &&
+	       follow_down(&path->mnt, &path->dentry))
 		;
 	err = -EINVAL;
-	if (!check_mnt(nd->path.mnt))
+	if (!check_mnt(path->mnt))
 		goto unlock;
 
 	/* Refuse the same filesystem on the same mount point */
 	err = -EBUSY;
-	if (nd->path.mnt->mnt_sb == newmnt->mnt_sb &&
-	    nd->path.mnt->mnt_root == nd->path.dentry)
+	if (path->mnt->mnt_sb == newmnt->mnt_sb &&
+	    path->mnt->mnt_root == path->dentry)
 		goto unlock;
 
 	err = -EINVAL;
@@ -1699,7 +1699,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
 		goto unlock;
 
 	newmnt->mnt_flags = mnt_flags;
-	if ((err = graft_tree(newmnt, &nd->path)))
+	if ((err = graft_tree(newmnt, path)))
 		goto unlock;
 
 	if (fslist) /* add to the specified expiration list */
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 2f285ef76399f..66df08dd1cafb 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -129,7 +129,7 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
 		goto out_err;
 
 	mntget(mnt);
-	err = do_add_mount(mnt, nd, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
+	err = do_add_mount(mnt, &nd->path, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
 			   &nfs_automount_list);
 	if (err < 0) {
 		mntput(mnt);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index b5efaa2132ab2..30a1d63b6fb5c 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -105,7 +105,8 @@ extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
 
 struct nameidata;
 
-extern int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
+struct path;
+extern int do_add_mount(struct vfsmount *newmnt, struct path *path,
 			int mnt_flags, struct list_head *fslist);
 
 extern void mark_mounts_for_expiry(struct list_head *mounts);