bpf: Extend the sk_lookup() helper to XDP hookpoint.

This patch proposes to extend the sk_lookup() BPF API to the
XDP hookpoint. The sk_lookup() helper supports a lookup
on incoming packet to find the corresponding socket that will
receive this packet. Current support for this BPF API is
at the tc hookpoint. This patch will extend this API at XDP
hookpoint. A XDP program can map the incoming packet to the
5-tuple parameter and invoke the API to find the corresponding
socket structure.

Signed-off-by: Nitin Hande <Nitin.Hande@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
Nitin Hande
2018-10-28 21:02:45 -07:00
committed by TechPanelGM
parent 1b176bd2e7
commit 2df8f1898b
2 changed files with 98 additions and 16 deletions

View File

@@ -2222,6 +2222,8 @@ union bpf_attr {
* **CONFIG_NET** configuration option. * **CONFIG_NET** configuration option.
* Return * Return
* Pointer to *struct bpf_sock*, or NULL in case of failure. * Pointer to *struct bpf_sock*, or NULL in case of failure.
* For sockets with reuseport option, *struct bpf_sock*
* return is from reuse->socks[] using hash of the packet.
* *
* struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
* Description * Description
@@ -2254,6 +2256,8 @@ union bpf_attr {
* **CONFIG_NET** configuration option. * **CONFIG_NET** configuration option.
* Return * Return
* Pointer to *struct bpf_sock*, or NULL in case of failure. * Pointer to *struct bpf_sock*, or NULL in case of failure.
* For sockets with reuseport option, *struct bpf_sock*
* return is from reuse->socks[] using hash of the packet.
* *
* int bpf_sk_release(struct bpf_sock *sk) * int bpf_sk_release(struct bpf_sock *sk)
* Description * Description

View File

@@ -3466,39 +3466,39 @@ static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
.arg3_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING,
}; };
#ifdef CONFIG_INET
struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
struct sk_buff *skb, u8 family, u8 proto) int dif, int sdif, u8 family, u8 proto)
{ {
int dif = skb->dev->ifindex;
bool refcounted = false; bool refcounted = false;
struct sock *sk = NULL; struct sock *sk = NULL;
if (family == AF_INET) { if (family == AF_INET) {
__be32 src4 = tuple->ipv4.saddr; __be32 src4 = tuple->ipv4.saddr;
__be32 dst4 = tuple->ipv4.daddr; __be32 dst4 = tuple->ipv4.daddr;
int sdif = inet_sdif(skb);
if (proto == IPPROTO_TCP) if (proto == IPPROTO_TCP)
sk = __inet_lookup(net, &tcp_hashinfo, skb, 0, sk = __inet_lookup(net, &tcp_hashinfo, NULL, 0,
src4, tuple->ipv4.sport, src4, tuple->ipv4.sport,
dst4, tuple->ipv4.dport, dst4, tuple->ipv4.dport,
dif, sdif, &refcounted); dif, sdif, &refcounted);
else else
sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport, sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
dst4, tuple->ipv4.dport, dst4, tuple->ipv4.dport,
dif, sdif, &udp_table, skb); dif, sdif, &udp_table, NULL);
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
} else { } else {
struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr; struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr; struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
int sdif = inet6_sdif(skb);
if (proto == IPPROTO_TCP) if (proto == IPPROTO_TCP)
sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0, sk = __inet6_lookup(net, &tcp_hashinfo, NULL, 0,
src6, tuple->ipv6.sport, src6, tuple->ipv6.sport,
dst6, tuple->ipv6.dport, dst6, tuple->ipv6.dport,
dif, sdif, &refcounted); dif, sdif, &refcounted);
else else
sk = __udp6_lib_lookup(net, src6, tuple->ipv6.sport, sk = __udp6_lib_lookup(net, src6, tuple->ipv6.sport,
dst6, tuple->ipv6.dport, dst6, tuple->ipv6.dport,
dif, sdif, &udp_table, skb); dif, sdif, &udp_table, NULL);
#endif #endif
} }
if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) { if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) {
@@ -3513,35 +3513,60 @@ struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
* callers to satisfy BPF_CALL declarations. * callers to satisfy BPF_CALL declarations.
*/ */
static unsigned long static unsigned long
bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
u8 proto, u64 netns_id, u64 flags) struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
u64 flags)
{ {
struct net *caller_net;
struct sock *sk = NULL; struct sock *sk = NULL;
u8 family = AF_UNSPEC; u8 family = AF_UNSPEC;
struct net *net; struct net *net;
int sdif;
family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6; family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6;
if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags)) if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags))
goto out; goto out;
if (skb->dev)
caller_net = dev_net(skb->dev); if (family == AF_INET)
sdif = inet_sdif(skb);
else else
caller_net = sock_net(skb->sk); sdif = inet6_sdif(skb);
if (netns_id) { if (netns_id) {
net = get_net_ns_by_id(caller_net, netns_id); net = get_net_ns_by_id(caller_net, netns_id);
if (unlikely(!net)) if (unlikely(!net))
goto out; goto out;
sk = sk_lookup(net, tuple, skb, family, proto); sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
put_net(net); put_net(net);
} else { } else {
net = caller_net; net = caller_net;
sk = sk_lookup(net, tuple, skb, family, proto); sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
} }
if (sk) if (sk)
sk = sk_to_full_sk(sk); sk = sk_to_full_sk(sk);
out: out:
return (unsigned long) sk; return (unsigned long) sk;
} }
static unsigned long
bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
u8 proto, u64 netns_id, u64 flags)
{
struct net *caller_net;
int ifindex;
if (skb->dev) {
caller_net = dev_net(skb->dev);
ifindex = skb->dev->ifindex;
} else {
caller_net = sock_net(skb->sk);
ifindex = 0;
}
return __bpf_sk_lookup(skb, tuple, len, caller_net, ifindex,
proto, netns_id, flags);
}
BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb, BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{ {
@@ -3587,6 +3612,51 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
.arg1_type = ARG_PTR_TO_SOCKET, .arg1_type = ARG_PTR_TO_SOCKET,
}; };
BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
{
struct net *caller_net = dev_net(ctx->rxq->dev);
int ifindex = ctx->rxq->dev->ifindex;
return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex,
IPPROTO_UDP, netns_id, flags);
}
static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
.func = bpf_xdp_sk_lookup_udp,
.gpl_only = false,
.pkt_access = true,
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
{
struct net *caller_net = dev_net(ctx->rxq->dev);
int ifindex = ctx->rxq->dev->ifindex;
return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex,
IPPROTO_TCP, netns_id, flags);
}
static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
.func = bpf_xdp_sk_lookup_tcp,
.gpl_only = false,
.pkt_access = true,
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
#endif /* CONFIG_INET */
static unsigned long bpf_skb_copy(void *dst_buff, const void *skb, static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
unsigned long off, unsigned long len) unsigned long off, unsigned long len)
{ {
@@ -5342,6 +5412,14 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_l4_csum_replace_proto; return &bpf_l4_csum_replace_proto;
case BPF_FUNC_set_hash_invalid: case BPF_FUNC_set_hash_invalid:
return &bpf_set_hash_invalid_proto; return &bpf_set_hash_invalid_proto;
#ifdef CONFIG_INET
case BPF_FUNC_sk_lookup_udp:
return &bpf_xdp_sk_lookup_udp_proto;
case BPF_FUNC_sk_lookup_tcp:
return &bpf_xdp_sk_lookup_tcp_proto;
case BPF_FUNC_sk_release:
return &bpf_sk_release_proto;
#endif
default: default:
return lwt_out_func_proto(func_id, prog); return lwt_out_func_proto(func_id, prog);
} }