mirror of
https://github.com/Divested-Mobile/DivestOS-Build.git
synced 2025-01-11 23:49:34 -05:00
321 lines
9.6 KiB
Diff
321 lines
9.6 KiB
Diff
|
From 7d267278a9ece963d77eefec61630223fce08c6c Mon Sep 17 00:00:00 2001
|
||
|
From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
|
||
|
Date: Fri, 20 Nov 2015 22:07:23 +0000
|
||
|
Subject: unix: avoid use-after-free in ep_remove_wait_queue
|
||
|
|
||
|
Rainer Weikusat <rweikusat@mobileactivedefense.com> writes:
|
||
|
An AF_UNIX datagram socket being the client in an n:1 association with
|
||
|
some server socket is only allowed to send messages to the server if the
|
||
|
receive queue of this socket contains at most sk_max_ack_backlog
|
||
|
datagrams. This implies that prospective writers might be forced to go
|
||
|
to sleep despite none of the message presently enqueued on the server
|
||
|
receive queue were sent by them. In order to ensure that these will be
|
||
|
woken up once space becomes again available, the present unix_dgram_poll
|
||
|
routine does a second sock_poll_wait call with the peer_wait wait queue
|
||
|
of the server socket as queue argument (unix_dgram_recvmsg does a wake
|
||
|
up on this queue after a datagram was received). This is inherently
|
||
|
problematic because the server socket is only guaranteed to remain alive
|
||
|
for as long as the client still holds a reference to it. In case the
|
||
|
connection is dissolved via connect or by the dead peer detection logic
|
||
|
in unix_dgram_sendmsg, the server socket may be freed despite "the
|
||
|
polling mechanism" (in particular, epoll) still has a pointer to the
|
||
|
corresponding peer_wait queue. There's no way to forcibly deregister a
|
||
|
wait queue with epoll.
|
||
|
|
||
|
Based on an idea by Jason Baron, the patch below changes the code such
|
||
|
that a wait_queue_t belonging to the client socket is enqueued on the
|
||
|
peer_wait queue of the server whenever the peer receive queue full
|
||
|
condition is detected by either a sendmsg or a poll. A wake up on the
|
||
|
peer queue is then relayed to the ordinary wait queue of the client
|
||
|
socket via wake function. The connection to the peer wait queue is again
|
||
|
dissolved if either a wake up is about to be relayed or the client
|
||
|
socket reconnects or a dead peer is detected or the client socket is
|
||
|
itself closed. This enables removing the second sock_poll_wait from
|
||
|
unix_dgram_poll, thus avoiding the use-after-free, while still ensuring
|
||
|
that no blocked writer sleeps forever.
|
||
|
|
||
|
Signed-off-by: Rainer Weikusat <rweikusat@mobileactivedefense.com>
|
||
|
Fixes: ec0d215f9420 ("af_unix: fix 'poll for write'/connected DGRAM sockets")
|
||
|
Reviewed-by: Jason Baron <jbaron@akamai.com>
|
||
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||
|
---
|
||
|
net/unix/af_unix.c | 183 +++++++++++++++++++++++++++++++++++++++++++++++------
|
||
|
1 file changed, 164 insertions(+), 19 deletions(-)
|
||
|
|
||
|
(limited to 'net/unix/af_unix.c')
|
||
|
|
||
|
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
|
||
|
index 955ec15..4e95bdf 100644
|
||
|
--- a/net/unix/af_unix.c
|
||
|
+++ b/net/unix/af_unix.c
|
||
|
@@ -326,6 +326,118 @@ found:
|
||
|
return s;
|
||
|
}
|
||
|
|
||
|
+/* Support code for asymmetrically connected dgram sockets
|
||
|
+ *
|
||
|
+ * If a datagram socket is connected to a socket not itself connected
|
||
|
+ * to the first socket (eg, /dev/log), clients may only enqueue more
|
||
|
+ * messages if the present receive queue of the server socket is not
|
||
|
+ * "too large". This means there's a second writeability condition
|
||
|
+ * poll and sendmsg need to test. The dgram recv code will do a wake
|
||
|
+ * up on the peer_wait wait queue of a socket upon reception of a
|
||
|
+ * datagram which needs to be propagated to sleeping would-be writers
|
||
|
+ * since these might not have sent anything so far. This can't be
|
||
|
+ * accomplished via poll_wait because the lifetime of the server
|
||
|
+ * socket might be less than that of its clients if these break their
|
||
|
+ * association with it or if the server socket is closed while clients
|
||
|
+ * are still connected to it and there's no way to inform "a polling
|
||
|
+ * implementation" that it should let go of a certain wait queue
|
||
|
+ *
|
||
|
+ * In order to propagate a wake up, a wait_queue_t of the client
|
||
|
+ * socket is enqueued on the peer_wait queue of the server socket
|
||
|
+ * whose wake function does a wake_up on the ordinary client socket
|
||
|
+ * wait queue. This connection is established whenever a write (or
|
||
|
+ * poll for write) hit the flow control condition and broken when the
|
||
|
+ * association to the server socket is dissolved or after a wake up
|
||
|
+ * was relayed.
|
||
|
+ */
|
||
|
+
|
||
|
+static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
|
||
|
+ void *key)
|
||
|
+{
|
||
|
+ struct unix_sock *u;
|
||
|
+ wait_queue_head_t *u_sleep;
|
||
|
+
|
||
|
+ u = container_of(q, struct unix_sock, peer_wake);
|
||
|
+
|
||
|
+ __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
|
||
|
+ q);
|
||
|
+ u->peer_wake.private = NULL;
|
||
|
+
|
||
|
+ /* relaying can only happen while the wq still exists */
|
||
|
+ u_sleep = sk_sleep(&u->sk);
|
||
|
+ if (u_sleep)
|
||
|
+ wake_up_interruptible_poll(u_sleep, key);
|
||
|
+
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
|
||
|
+{
|
||
|
+ struct unix_sock *u, *u_other;
|
||
|
+ int rc;
|
||
|
+
|
||
|
+ u = unix_sk(sk);
|
||
|
+ u_other = unix_sk(other);
|
||
|
+ rc = 0;
|
||
|
+ spin_lock(&u_other->peer_wait.lock);
|
||
|
+
|
||
|
+ if (!u->peer_wake.private) {
|
||
|
+ u->peer_wake.private = other;
|
||
|
+ __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
|
||
|
+
|
||
|
+ rc = 1;
|
||
|
+ }
|
||
|
+
|
||
|
+ spin_unlock(&u_other->peer_wait.lock);
|
||
|
+ return rc;
|
||
|
+}
|
||
|
+
|
||
|
+static void unix_dgram_peer_wake_disconnect(struct sock *sk,
|
||
|
+ struct sock *other)
|
||
|
+{
|
||
|
+ struct unix_sock *u, *u_other;
|
||
|
+
|
||
|
+ u = unix_sk(sk);
|
||
|
+ u_other = unix_sk(other);
|
||
|
+ spin_lock(&u_other->peer_wait.lock);
|
||
|
+
|
||
|
+ if (u->peer_wake.private == other) {
|
||
|
+ __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
|
||
|
+ u->peer_wake.private = NULL;
|
||
|
+ }
|
||
|
+
|
||
|
+ spin_unlock(&u_other->peer_wait.lock);
|
||
|
+}
|
||
|
+
|
||
|
+static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
|
||
|
+ struct sock *other)
|
||
|
+{
|
||
|
+ unix_dgram_peer_wake_disconnect(sk, other);
|
||
|
+ wake_up_interruptible_poll(sk_sleep(sk),
|
||
|
+ POLLOUT |
|
||
|
+ POLLWRNORM |
|
||
|
+ POLLWRBAND);
|
||
|
+}
|
||
|
+
|
||
|
+/* preconditions:
|
||
|
+ * - unix_peer(sk) == other
|
||
|
+ * - association is stable
|
||
|
+ */
|
||
|
+static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
|
||
|
+{
|
||
|
+ int connected;
|
||
|
+
|
||
|
+ connected = unix_dgram_peer_wake_connect(sk, other);
|
||
|
+
|
||
|
+ if (unix_recvq_full(other))
|
||
|
+ return 1;
|
||
|
+
|
||
|
+ if (connected)
|
||
|
+ unix_dgram_peer_wake_disconnect(sk, other);
|
||
|
+
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
static int unix_writable(const struct sock *sk)
|
||
|
{
|
||
|
return sk->sk_state != TCP_LISTEN &&
|
||
|
@@ -431,6 +543,8 @@ static void unix_release_sock(struct sock *sk, int embrion)
|
||
|
skpair->sk_state_change(skpair);
|
||
|
sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
|
||
|
}
|
||
|
+
|
||
|
+ unix_dgram_peer_wake_disconnect(sk, skpair);
|
||
|
sock_put(skpair); /* It may now die */
|
||
|
unix_peer(sk) = NULL;
|
||
|
}
|
||
|
@@ -666,6 +780,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
|
||
|
INIT_LIST_HEAD(&u->link);
|
||
|
mutex_init(&u->readlock); /* single task reading lock */
|
||
|
init_waitqueue_head(&u->peer_wait);
|
||
|
+ init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
|
||
|
unix_insert_socket(unix_sockets_unbound(sk), sk);
|
||
|
out:
|
||
|
if (sk == NULL)
|
||
|
@@ -1033,6 +1148,8 @@ restart:
|
||
|
if (unix_peer(sk)) {
|
||
|
struct sock *old_peer = unix_peer(sk);
|
||
|
unix_peer(sk) = other;
|
||
|
+ unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
|
||
|
+
|
||
|
unix_state_double_unlock(sk, other);
|
||
|
|
||
|
if (other != old_peer)
|
||
|
@@ -1472,6 +1589,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
|
||
|
struct scm_cookie scm;
|
||
|
int max_level;
|
||
|
int data_len = 0;
|
||
|
+ int sk_locked;
|
||
|
|
||
|
wait_for_unix_gc();
|
||
|
err = scm_send(sock, msg, &scm, false);
|
||
|
@@ -1550,12 +1668,14 @@ restart:
|
||
|
goto out_free;
|
||
|
}
|
||
|
|
||
|
+ sk_locked = 0;
|
||
|
unix_state_lock(other);
|
||
|
+restart_locked:
|
||
|
err = -EPERM;
|
||
|
if (!unix_may_send(sk, other))
|
||
|
goto out_unlock;
|
||
|
|
||
|
- if (sock_flag(other, SOCK_DEAD)) {
|
||
|
+ if (unlikely(sock_flag(other, SOCK_DEAD))) {
|
||
|
/*
|
||
|
* Check with 1003.1g - what should
|
||
|
* datagram error
|
||
|
@@ -1563,10 +1683,14 @@ restart:
|
||
|
unix_state_unlock(other);
|
||
|
sock_put(other);
|
||
|
|
||
|
+ if (!sk_locked)
|
||
|
+ unix_state_lock(sk);
|
||
|
+
|
||
|
err = 0;
|
||
|
- unix_state_lock(sk);
|
||
|
if (unix_peer(sk) == other) {
|
||
|
unix_peer(sk) = NULL;
|
||
|
+ unix_dgram_peer_wake_disconnect_wakeup(sk, other);
|
||
|
+
|
||
|
unix_state_unlock(sk);
|
||
|
|
||
|
unix_dgram_disconnected(sk, other);
|
||
|
@@ -1592,21 +1716,38 @@ restart:
|
||
|
goto out_unlock;
|
||
|
}
|
||
|
|
||
|
- if (unix_peer(other) != sk && unix_recvq_full(other)) {
|
||
|
- if (!timeo) {
|
||
|
- err = -EAGAIN;
|
||
|
- goto out_unlock;
|
||
|
+ if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
|
||
|
+ if (timeo) {
|
||
|
+ timeo = unix_wait_for_peer(other, timeo);
|
||
|
+
|
||
|
+ err = sock_intr_errno(timeo);
|
||
|
+ if (signal_pending(current))
|
||
|
+ goto out_free;
|
||
|
+
|
||
|
+ goto restart;
|
||
|
}
|
||
|
|
||
|
- timeo = unix_wait_for_peer(other, timeo);
|
||
|
+ if (!sk_locked) {
|
||
|
+ unix_state_unlock(other);
|
||
|
+ unix_state_double_lock(sk, other);
|
||
|
+ }
|
||
|
|
||
|
- err = sock_intr_errno(timeo);
|
||
|
- if (signal_pending(current))
|
||
|
- goto out_free;
|
||
|
+ if (unix_peer(sk) != other ||
|
||
|
+ unix_dgram_peer_wake_me(sk, other)) {
|
||
|
+ err = -EAGAIN;
|
||
|
+ sk_locked = 1;
|
||
|
+ goto out_unlock;
|
||
|
+ }
|
||
|
|
||
|
- goto restart;
|
||
|
+ if (!sk_locked) {
|
||
|
+ sk_locked = 1;
|
||
|
+ goto restart_locked;
|
||
|
+ }
|
||
|
}
|
||
|
|
||
|
+ if (unlikely(sk_locked))
|
||
|
+ unix_state_unlock(sk);
|
||
|
+
|
||
|
if (sock_flag(other, SOCK_RCVTSTAMP))
|
||
|
__net_timestamp(skb);
|
||
|
maybe_add_creds(skb, sock, other);
|
||
|
@@ -1620,6 +1761,8 @@ restart:
|
||
|
return len;
|
||
|
|
||
|
out_unlock:
|
||
|
+ if (sk_locked)
|
||
|
+ unix_state_unlock(sk);
|
||
|
unix_state_unlock(other);
|
||
|
out_free:
|
||
|
kfree_skb(skb);
|
||
|
@@ -2476,14 +2619,16 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
|
||
|
return mask;
|
||
|
|
||
|
writable = unix_writable(sk);
|
||
|
- other = unix_peer_get(sk);
|
||
|
- if (other) {
|
||
|
- if (unix_peer(other) != sk) {
|
||
|
- sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
|
||
|
- if (unix_recvq_full(other))
|
||
|
- writable = 0;
|
||
|
- }
|
||
|
- sock_put(other);
|
||
|
+ if (writable) {
|
||
|
+ unix_state_lock(sk);
|
||
|
+
|
||
|
+ other = unix_peer(sk);
|
||
|
+ if (other && unix_peer(other) != sk &&
|
||
|
+ unix_recvq_full(other) &&
|
||
|
+ unix_dgram_peer_wake_me(sk, other))
|
||
|
+ writable = 0;
|
||
|
+
|
||
|
+ unix_state_unlock(sk);
|
||
|
}
|
||
|
|
||
|
if (writable)
|
||
|
--
|
||
|
cgit v1.1
|
||
|
|