mirror of
https://github.com/Divested-Mobile/DivestOS-Build.git
synced 2024-10-01 01:35:54 -04:00
251 lines
8.6 KiB
Diff
251 lines
8.6 KiB
Diff
|
From 759c01142a5d0f364a462346168a56de28a80f52 Mon Sep 17 00:00:00 2001
|
||
|
From: Willy Tarreau <w@1wt.eu>
|
||
|
Date: Mon, 18 Jan 2016 16:36:09 +0100
|
||
|
Subject: pipe: limit the per-user amount of pages allocated in pipes
|
||
|
|
||
|
On no-so-small systems, it is possible for a single process to cause an
|
||
|
OOM condition by filling large pipes with data that are never read. A
|
||
|
typical process filling 4000 pipes with 1 MB of data will use 4 GB of
|
||
|
memory. On small systems it may be tricky to set the pipe max size to
|
||
|
prevent this from happening.
|
||
|
|
||
|
This patch makes it possible to enforce a per-user soft limit above
|
||
|
which new pipes will be limited to a single page, effectively limiting
|
||
|
them to 4 kB each, as well as a hard limit above which no new pipes may
|
||
|
be created for this user. This has the effect of protecting the system
|
||
|
against memory abuse without hurting other users, and still allowing
|
||
|
pipes to work correctly though with less data at once.
|
||
|
|
||
|
The limit are controlled by two new sysctls : pipe-user-pages-soft, and
|
||
|
pipe-user-pages-hard. Both may be disabled by setting them to zero. The
|
||
|
default soft limit allows the default number of FDs per process (1024)
|
||
|
to create pipes of the default size (64kB), thus reaching a limit of 64MB
|
||
|
before starting to create only smaller pipes. With 256 processes limited
|
||
|
to 1024 FDs each, this results in 1024*64kB + (256*1024 - 1024) * 4kB =
|
||
|
1084 MB of memory allocated for a user. The hard limit is disabled by
|
||
|
default to avoid breaking existing applications that make intensive use
|
||
|
of pipes (eg: for splicing).
|
||
|
|
||
|
Reported-by: socketpair@gmail.com
|
||
|
Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
|
||
|
Mitigates: CVE-2013-4312 (Linux 2.0+)
|
||
|
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||
|
Signed-off-by: Willy Tarreau <w@1wt.eu>
|
||
|
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
|
||
|
---
|
||
|
Documentation/sysctl/fs.txt | 23 ++++++++++++++++++++++
|
||
|
fs/pipe.c | 47 +++++++++++++++++++++++++++++++++++++++++++--
|
||
|
include/linux/pipe_fs_i.h | 4 ++++
|
||
|
include/linux/sched.h | 1 +
|
||
|
kernel/sysctl.c | 14 ++++++++++++++
|
||
|
5 files changed, 87 insertions(+), 2 deletions(-)
|
||
|
|
||
|
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
|
||
|
index 88152f2..302b5ed 100644
|
||
|
--- a/Documentation/sysctl/fs.txt
|
||
|
+++ b/Documentation/sysctl/fs.txt
|
||
|
@@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/fs:
|
||
|
- nr_open
|
||
|
- overflowuid
|
||
|
- overflowgid
|
||
|
+- pipe-user-pages-hard
|
||
|
+- pipe-user-pages-soft
|
||
|
- protected_hardlinks
|
||
|
- protected_symlinks
|
||
|
- suid_dumpable
|
||
|
@@ -159,6 +161,27 @@ The default is 65534.
|
||
|
|
||
|
==============================================================
|
||
|
|
||
|
+pipe-user-pages-hard:
|
||
|
+
|
||
|
+Maximum total number of pages a non-privileged user may allocate for pipes.
|
||
|
+Once this limit is reached, no new pipes may be allocated until usage goes
|
||
|
+below the limit again. When set to 0, no limit is applied, which is the default
|
||
|
+setting.
|
||
|
+
|
||
|
+==============================================================
|
||
|
+
|
||
|
+pipe-user-pages-soft:
|
||
|
+
|
||
|
+Maximum total number of pages a non-privileged user may allocate for pipes
|
||
|
+before the pipe size gets limited to a single page. Once this limit is reached,
|
||
|
+new pipes will be limited to a single page in size for this user in order to
|
||
|
+limit total memory usage, and trying to increase them using fcntl() will be
|
||
|
+denied until usage goes below the limit again. The default value allows to
|
||
|
+allocate up to 1024 pipes at their default size. When set to 0, no limit is
|
||
|
+applied.
|
||
|
+
|
||
|
+==============================================================
|
||
|
+
|
||
|
protected_hardlinks:
|
||
|
|
||
|
A long-standing class of security issues is the hardlink-based
|
||
|
diff --git a/fs/pipe.c b/fs/pipe.c
|
||
|
index 42cf8dd..ab8dad3 100644
|
||
|
--- a/fs/pipe.c
|
||
|
+++ b/fs/pipe.c
|
||
|
@@ -38,6 +38,12 @@ unsigned int pipe_max_size = 1048576;
|
||
|
*/
|
||
|
unsigned int pipe_min_size = PAGE_SIZE;
|
||
|
|
||
|
+/* Maximum allocatable pages per user. Hard limit is unset by default, soft
|
||
|
+ * matches default values.
|
||
|
+ */
|
||
|
+unsigned long pipe_user_pages_hard;
|
||
|
+unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
|
||
|
+
|
||
|
/*
|
||
|
* We use a start+len construction, which provides full use of the
|
||
|
* allocated memory.
|
||
|
@@ -583,20 +589,49 @@ pipe_fasync(int fd, struct file *filp, int on)
|
||
|
return retval;
|
||
|
}
|
||
|
|
||
|
+static void account_pipe_buffers(struct pipe_inode_info *pipe,
|
||
|
+ unsigned long old, unsigned long new)
|
||
|
+{
|
||
|
+ atomic_long_add(new - old, &pipe->user->pipe_bufs);
|
||
|
+}
|
||
|
+
|
||
|
+static bool too_many_pipe_buffers_soft(struct user_struct *user)
|
||
|
+{
|
||
|
+ return pipe_user_pages_soft &&
|
||
|
+ atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_soft;
|
||
|
+}
|
||
|
+
|
||
|
+static bool too_many_pipe_buffers_hard(struct user_struct *user)
|
||
|
+{
|
||
|
+ return pipe_user_pages_hard &&
|
||
|
+ atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_hard;
|
||
|
+}
|
||
|
+
|
||
|
struct pipe_inode_info *alloc_pipe_info(void)
|
||
|
{
|
||
|
struct pipe_inode_info *pipe;
|
||
|
|
||
|
pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
|
||
|
if (pipe) {
|
||
|
- pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL);
|
||
|
+ unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
|
||
|
+ struct user_struct *user = get_current_user();
|
||
|
+
|
||
|
+ if (!too_many_pipe_buffers_hard(user)) {
|
||
|
+ if (too_many_pipe_buffers_soft(user))
|
||
|
+ pipe_bufs = 1;
|
||
|
+ pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * pipe_bufs, GFP_KERNEL);
|
||
|
+ }
|
||
|
+
|
||
|
if (pipe->bufs) {
|
||
|
init_waitqueue_head(&pipe->wait);
|
||
|
pipe->r_counter = pipe->w_counter = 1;
|
||
|
- pipe->buffers = PIPE_DEF_BUFFERS;
|
||
|
+ pipe->buffers = pipe_bufs;
|
||
|
+ pipe->user = user;
|
||
|
+ account_pipe_buffers(pipe, 0, pipe_bufs);
|
||
|
mutex_init(&pipe->mutex);
|
||
|
return pipe;
|
||
|
}
|
||
|
+ free_uid(user);
|
||
|
kfree(pipe);
|
||
|
}
|
||
|
|
||
|
@@ -607,6 +642,8 @@ void free_pipe_info(struct pipe_inode_info *pipe)
|
||
|
{
|
||
|
int i;
|
||
|
|
||
|
+ account_pipe_buffers(pipe, pipe->buffers, 0);
|
||
|
+ free_uid(pipe->user);
|
||
|
for (i = 0; i < pipe->buffers; i++) {
|
||
|
struct pipe_buffer *buf = pipe->bufs + i;
|
||
|
if (buf->ops)
|
||
|
@@ -998,6 +1035,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
|
||
|
memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
|
||
|
}
|
||
|
|
||
|
+ account_pipe_buffers(pipe, pipe->buffers, nr_pages);
|
||
|
pipe->curbuf = 0;
|
||
|
kfree(pipe->bufs);
|
||
|
pipe->bufs = bufs;
|
||
|
@@ -1069,6 +1107,11 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
|
||
|
if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
|
||
|
ret = -EPERM;
|
||
|
goto out;
|
||
|
+ } else if ((too_many_pipe_buffers_hard(pipe->user) ||
|
||
|
+ too_many_pipe_buffers_soft(pipe->user)) &&
|
||
|
+ !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
|
||
|
+ ret = -EPERM;
|
||
|
+ goto out;
|
||
|
}
|
||
|
ret = pipe_set_size(pipe, nr_pages);
|
||
|
break;
|
||
|
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
|
||
|
index eb8b8ac..24f5470 100644
|
||
|
--- a/include/linux/pipe_fs_i.h
|
||
|
+++ b/include/linux/pipe_fs_i.h
|
||
|
@@ -42,6 +42,7 @@ struct pipe_buffer {
|
||
|
* @fasync_readers: reader side fasync
|
||
|
* @fasync_writers: writer side fasync
|
||
|
* @bufs: the circular array of pipe buffers
|
||
|
+ * @user: the user who created this pipe
|
||
|
**/
|
||
|
struct pipe_inode_info {
|
||
|
struct mutex mutex;
|
||
|
@@ -57,6 +58,7 @@ struct pipe_inode_info {
|
||
|
struct fasync_struct *fasync_readers;
|
||
|
struct fasync_struct *fasync_writers;
|
||
|
struct pipe_buffer *bufs;
|
||
|
+ struct user_struct *user;
|
||
|
};
|
||
|
|
||
|
/*
|
||
|
@@ -123,6 +125,8 @@ void pipe_unlock(struct pipe_inode_info *);
|
||
|
void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
|
||
|
|
||
|
extern unsigned int pipe_max_size, pipe_min_size;
|
||
|
+extern unsigned long pipe_user_pages_hard;
|
||
|
+extern unsigned long pipe_user_pages_soft;
|
||
|
int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *);
|
||
|
|
||
|
|
||
|
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
||
|
index 61aa9bb..1589ddc8 100644
|
||
|
--- a/include/linux/sched.h
|
||
|
+++ b/include/linux/sched.h
|
||
|
@@ -835,6 +835,7 @@ struct user_struct {
|
||
|
#endif
|
||
|
unsigned long locked_shm; /* How many pages of mlocked shm ? */
|
||
|
unsigned long unix_inflight; /* How many files in flight in unix sockets */
|
||
|
+ atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */
|
||
|
|
||
|
#ifdef CONFIG_KEYS
|
||
|
struct key *uid_keyring; /* UID specific keyring */
|
||
|
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
|
||
|
index c810f8a..f6fd236 100644
|
||
|
--- a/kernel/sysctl.c
|
||
|
+++ b/kernel/sysctl.c
|
||
|
@@ -1757,6 +1757,20 @@ static struct ctl_table fs_table[] = {
|
||
|
.proc_handler = &pipe_proc_fn,
|
||
|
.extra1 = &pipe_min_size,
|
||
|
},
|
||
|
+ {
|
||
|
+ .procname = "pipe-user-pages-hard",
|
||
|
+ .data = &pipe_user_pages_hard,
|
||
|
+ .maxlen = sizeof(pipe_user_pages_hard),
|
||
|
+ .mode = 0644,
|
||
|
+ .proc_handler = proc_doulongvec_minmax,
|
||
|
+ },
|
||
|
+ {
|
||
|
+ .procname = "pipe-user-pages-soft",
|
||
|
+ .data = &pipe_user_pages_soft,
|
||
|
+ .maxlen = sizeof(pipe_user_pages_soft),
|
||
|
+ .mode = 0644,
|
||
|
+ .proc_handler = proc_doulongvec_minmax,
|
||
|
+ },
|
||
|
{ }
|
||
|
};
|
||
|
|
||
|
--
|
||
|
cgit v1.1
|
||
|
|