From a6a295a31168eafb4049a81f2db7bedc339da75e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Nov 2015 19:37:57 -0800 Subject: [PATCH] ipv6: add complete rcu protection around np->opt [ Upstream commit 45f6fad84cc305103b28d73482b344d7f5b76f39 ] This patch addresses multiple problems : UDP/RAW sendmsg() need to get a stable struct ipv6_txoptions while socket is not locked : Other threads can change np->opt concurrently. Dmitry posted a syzkaller (http://github.com/google/syzkaller) program desmonstrating use-after-free. Starting with TCP/DCCP lockless listeners, tcp_v6_syn_recv_sock() and dccp_v6_request_recv_sock() also need to use RCU protection to dereference np->opt once (before calling ipv6_dup_options()) This patch adds full RCU protection to np->opt BUG: 28746669 Change-Id: I207da29ac48bb6dd7c40d65f9e27c4e3ff508da0 Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Jiri Slaby Signed-off-by: Pierre Imai --- include/linux/ipv6.h | 2 +- include/net/ipv6.h | 21 ++++++++++++++++++++- net/dccp/ipv6.c | 39 +++++++++++++++++++++------------------ net/ipv6/af_inet6.c | 12 +++++++++--- net/ipv6/datagram.c | 4 +++- net/ipv6/exthdrs.c | 3 ++- net/ipv6/inet6_connection_sock.c | 11 ++++++++--- net/ipv6/ipv6_sockglue.c | 36 ++++++++++++++++++++++++------------ net/ipv6/raw.c | 8 ++++++-- net/ipv6/syncookies.c | 2 +- net/ipv6/tcp_ipv6.c | 28 +++++++++++++++------------- net/ipv6/udp.c | 8 ++++++-- 12 files changed, 116 insertions(+), 58 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index d5041862ed6cc..8e3f2cb7d7cf1 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -382,7 +382,7 @@ struct ipv6_pinfo { struct ipv6_ac_socklist *ipv6_ac_list; struct ipv6_fl_socklist *ipv6_fl_list; - struct ipv6_txoptions *opt; + struct ipv6_txoptions __rcu *opt; struct sk_buff *pktoptions; struct sk_buff *rxpmtu; struct { diff --git a/include/net/ipv6.h b/include/net/ipv6.h index f3d9b54e81d4d..1f455db905900 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -203,6 +203,7 @@ extern rwlock_t ip6_ra_lock; */ struct ipv6_txoptions { + atomic_t refcnt; /* Length of this structure */ int tot_len; @@ -215,7 +216,7 @@ struct ipv6_txoptions { struct ipv6_opt_hdr *dst0opt; struct ipv6_rt_hdr *srcrt; /* Routing Header */ struct ipv6_opt_hdr *dst1opt; - + struct rcu_head rcu; /* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */ }; @@ -241,6 +242,24 @@ struct ipv6_fl_socklist { struct ip6_flowlabel *fl; }; +static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) +{ + struct ipv6_txoptions *opt; + + rcu_read_lock(); + opt = rcu_dereference(np->opt); + if (opt && !atomic_inc_not_zero(&opt->refcnt)) + opt = NULL; + rcu_read_unlock(); + return opt; +} + +static inline void txopt_put(struct ipv6_txoptions *opt) +{ + if (opt && atomic_dec_and_test(&opt->refcnt)) + kfree_rcu(opt, rcu); +} + extern struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label); extern struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space, struct ip6_flowlabel * fl, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4dc588f520e04..95fd5ec945f03 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -253,9 +253,9 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, fl6.fl6_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); - opt = np->opt; - - final_p = fl6_update_dst(&fl6, opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) { @@ -272,13 +272,14 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, &ireq6->loc_addr, &ireq6->rmt_addr); fl6.daddr = ireq6->rmt_addr; - err = ip6_xmit(sk, skb, &fl6, opt, np->tclass); + rcu_read_lock(); + err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + np->tclass); + rcu_read_unlock(); err = net_xmit_eval(err); } done: - if (opt != NULL && opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); dst_release(dst); return err; } @@ -469,6 +470,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, { struct inet6_request_sock *ireq6 = inet6_rsk(req); struct ipv6_pinfo *newnp, *np = inet6_sk(sk); + struct ipv6_txoptions *opt; struct inet_sock *newinet; struct dccp6_sock *newdp6; struct sock *newsk; @@ -594,16 +596,16 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, * Yes, keeping reference count would be much more clever, but we make * one more one thing there: reattach optmem to newsk. */ - if (opt != NULL) { - newnp->opt = ipv6_dup_options(newsk, opt); - if (opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); - } + opt = rcu_dereference(np->opt); + if (opt) { + opt = ipv6_dup_options(newsk, opt); + RCU_INIT_POINTER(newnp->opt, opt); + } inet_csk(newsk)->icsk_ext_hdr_len = 0; - if (newnp->opt != NULL) - inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + - newnp->opt->opt_flen); + if (opt) + inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + + opt->opt_flen; dccp_sync_mss(newsk, dst_mtu(dst)); @@ -856,6 +858,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; + struct ipv6_txoptions *opt; struct flowi6 fl6; struct dst_entry *dst; int addr_type; @@ -958,7 +961,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + final_p = fl6_update_dst(&fl6, opt, &final); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); if (IS_ERR(dst)) { @@ -978,9 +982,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, __ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; - if (np->opt != NULL) - icsk->icsk_ext_hdr_len = (np->opt->opt_flen + - np->opt->opt_nflen); + if (opt) + icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; inet->inet_dport = usin->sin6_port; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 296886bff7348..69b587e3a5cbb 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -448,8 +448,11 @@ void inet6_destroy_sock(struct sock *sk) /* Free tx options */ - if ((opt = xchg(&np->opt, NULL)) != NULL) - sock_kfree_s(sk, opt, opt->tot_len); + opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } } EXPORT_SYMBOL_GPL(inet6_destroy_sock); @@ -705,7 +708,10 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), + &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index ae4d713ac88d8..2659d0028bb12 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -167,8 +167,10 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - opt = flowlabel ? flowlabel->opt : np->opt; + rcu_read_lock(); + opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt); final_p = fl6_update_dst(&fl6, opt, &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); err = 0; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 3d641b6e9b092..e66773850e50d 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -748,6 +748,7 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) *((char**)&opt2->dst1opt) += dif; if (opt2->srcrt) *((char**)&opt2->srcrt) += dif; + atomic_set(&opt2->refcnt, 1); } return opt2; } @@ -812,7 +813,7 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, return ERR_PTR(-ENOBUFS); memset(opt2, 0, tot_len); - + atomic_set(&opt2->refcnt, 1); opt2->tot_len = tot_len; p = (char *)(opt2 + 1); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index aefc8b7180951..67aa2c2b502c9 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -66,7 +66,9 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = treq->rmt_addr; - final_p = fl6_update_dst(&fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); fl6.saddr = treq->loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = inet_rsk(req)->ir_mark; @@ -227,7 +229,9 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) fl6.flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); dst = __inet6_csk_dst_check(sk, np->dst_cookie); @@ -250,7 +254,8 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) /* Restore final destination back after routing done */ fl6.daddr = np->daddr; - res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); + res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + np->tclass); rcu_read_unlock(); return res; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 63dd1f89ed7de..601360e6bb839 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -110,10 +110,12 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } - opt = xchg(&inet6_sk(sk)->opt, opt); + opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt, + opt); } else { spin_lock(&sk->sk_dst_lock); - opt = xchg(&inet6_sk(sk)->opt, opt); + opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt, + opt); spin_unlock(&sk->sk_dst_lock); } sk_dst_reset(sk); @@ -213,9 +215,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; } - opt = xchg(&np->opt, NULL); - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + opt = xchg((__force struct ipv6_txoptions **)&np->opt, + NULL); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } pktopt = xchg(&np->pktoptions, NULL); kfree_skb(pktopt); @@ -384,7 +389,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW)) break; - opt = ipv6_renew_options(sk, np->opt, optname, + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + opt = ipv6_renew_options(sk, opt, optname, (struct ipv6_opt_hdr __user *)optval, optlen); if (IS_ERR(opt)) { @@ -413,8 +419,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; opt = ipv6_update_options(sk, opt); sticky_done: - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } break; } @@ -467,6 +475,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; memset(opt, 0, sizeof(*opt)); + atomic_set(&opt->refcnt, 1); opt->tot_len = sizeof(*opt) + optlen; retv = -EFAULT; if (copy_from_user(opt+1, optval, optlen)) @@ -483,8 +492,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; opt = ipv6_update_options(sk, opt); done: - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } break; } case IPV6_UNICAST_HOPS: @@ -1084,10 +1095,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, case IPV6_RTHDR: case IPV6_DSTOPTS: { + struct ipv6_txoptions *opt; lock_sock(sk); - len = ipv6_getsockopt_sticky(sk, np->opt, - optname, optval, len); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len); release_sock(sk); /* check if ipv6_getsockopt_sticky() returns err code */ if (len < 0) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 49ec3f8e7ceaa..dbe4e09ee7c5f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -728,6 +728,7 @@ static int rawv6_probe_proto_opt(struct flowi6 *fl6, struct msghdr *msg) static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { + struct ipv6_txoptions *opt_to_free = NULL; struct ipv6_txoptions opt_space; struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; struct in6_addr *daddr, *final_p, final; @@ -835,8 +836,10 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; } - if (opt == NULL) - opt = np->opt; + if (!opt) { + opt = txopt_get(np); + opt_to_free = opt; + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); @@ -903,6 +906,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, dst_release(dst); out: fl6_sock_release(flowlabel); + txopt_put(opt_to_free); return err<0?err:len; do_confirm: dst_confirm(dst); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index af939afeae226..b57996a4fd66d 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -240,7 +240,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = ireq6->rmt_addr; - final_p = fl6_update_dst(&fl6, np->opt, &final); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); fl6.saddr = ireq6->loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = inet_rsk(req)->ir_mark; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c4de212ad12c2..c39a2f47dd8c7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -132,6 +132,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; + struct ipv6_txoptions *opt; struct rt6_info *rt; struct flowi6 fl6; struct dst_entry *dst; @@ -253,7 +254,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_sport = inet->inet_sport; fl6.flowi6_uid = sock_i_uid(sk); - final_p = fl6_update_dst(&fl6, np->opt, &final); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + final_p = fl6_update_dst(&fl6, opt, &final); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); @@ -296,9 +298,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } icsk->icsk_ext_hdr_len = 0; - if (np->opt) - icsk->icsk_ext_hdr_len = (np->opt->opt_flen + - np->opt->opt_nflen); + if (opt) + icsk->icsk_ext_hdr_len = opt->opt_flen + + opt->opt_nflen; tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); @@ -516,7 +518,8 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); fl6.daddr = treq->rmt_addr; - err = ip6_xmit(sk, skb, &fl6, opt, np->tclass); + err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + np->tclass); err = net_xmit_eval(err); } @@ -1243,10 +1246,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct inet6_request_sock *treq; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct tcp6_sock *newtcp6sk; + struct ipv6_txoptions *opt; struct inet_sock *newinet; struct tcp_sock *newtp; struct sock *newsk; - struct ipv6_txoptions *opt; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; #endif @@ -1375,16 +1378,15 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, but we make one more one thing there: reattach optmem to newsk. */ + opt = rcu_dereference(np->opt); if (opt) { - newnp->opt = ipv6_dup_options(newsk, opt); - if (opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); + opt = ipv6_dup_options(newsk, opt); + RCU_INIT_POINTER(newnp->opt, opt); } - inet_csk(newsk)->icsk_ext_hdr_len = 0; - if (newnp->opt) - inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + - newnp->opt->opt_flen); + if (opt) + inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + + opt->opt_flen; tcp_mtup_init(newsk); tcp_sync_mss(newsk, dst_mtu(dst)); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index a20d55dc9c2aa..101d2ba8df281 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -955,6 +955,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; struct in6_addr *daddr, *final_p, final; struct ipv6_txoptions *opt = NULL; + struct ipv6_txoptions *opt_to_free = NULL; struct ip6_flowlabel *flowlabel = NULL; struct flowi6 fl6; struct dst_entry *dst; @@ -1109,8 +1110,10 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, opt = NULL; connected = 0; } - if (opt == NULL) - opt = np->opt; + if (!opt) { + opt = txopt_get(np); + opt_to_free = opt; + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); @@ -1211,6 +1214,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, out: dst_release(dst); fl6_sock_release(flowlabel); + txopt_put(opt_to_free); if (!err) return len; /*