diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8c424dbc9933..d90ac7456612 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -202,6 +202,7 @@ enum bpf_return_type { RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ + RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -257,6 +258,8 @@ enum bpf_reg_type { PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */ PTR_TO_SOCK_COMMON, /* reg points to sock_common */ PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ + PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ + PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ }; /* The information passed from prog-specific *_is_valid_access @@ -968,4 +971,31 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, return 0; } #endif + +#ifdef CONFIG_INET +bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info); +u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size); +#else +static inline bool bpf_tcp_sock_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + return false; +} + +static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size) +{ + return 0; +} +#endif /* CONFIG_INET */ + #endif /* _LINUX_BPF_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6500148fee0b..192e7bf054b1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2265,6 +2265,15 @@ union bpf_attr { * pointer that was returned from bpf_sk_lookup_xxx\ (). * Return * 0 on success, or a negative error in case of failure. + * + * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) + * Description + * This helper gets a **struct bpf_tcp_sock** pointer from a + * **struct bpf_sock** pointer. + * + * Return + * A **struct bpf_tcp_sock** pointer on success, or NULL in + * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2569,6 +2578,45 @@ struct bpf_sock { */ }; +struct bpf_tcp_sock { + __u32 snd_cwnd; /* Sending congestion window */ + __u32 srtt_us; /* smoothed round trip time << 3 in usecs */ + __u32 rtt_min; + __u32 snd_ssthresh; /* Slow start size threshold */ + __u32 rcv_nxt; /* What we want to receive next */ + __u32 snd_nxt; /* Next sequence we send */ + __u32 snd_una; /* First byte we want an ack for */ + __u32 mss_cache; /* Cached effective mss, not including SACKS */ + __u32 ecn_flags; /* ECN status bits. */ + __u32 rate_delivered; /* saved rate sample: packets delivered */ + __u32 rate_interval_us; /* saved rate sample: time elapsed */ + __u32 packets_out; /* Packets which are "in flight" */ + __u32 retrans_out; /* Retransmitted packets out */ + __u32 total_retrans; /* Total retransmits for entire connection */ + __u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn + * total number of segments in. + */ + __u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn + * total number of data segments in. + */ + __u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut + * The total number of segments sent. + */ + __u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut + * total number of data segments sent. + */ + __u32 lost_out; /* Lost packets */ + __u32 sacked_out; /* SACK'd packets */ + __u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived + * sum(delta(rcv_nxt)), or how many bytes + * were acked. + */ + __u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked + * sum(delta(snd_una)), or how many bytes + * were acked. + */ +}; + struct bpf_sock_tuple { union { struct { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 836e9fe38cbc..87025ddd79c6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -277,14 +277,16 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type) static bool type_is_sk_pointer(enum bpf_reg_type type) { return type == PTR_TO_SOCKET || - type == PTR_TO_SOCK_COMMON; + type == PTR_TO_SOCK_COMMON || + type == PTR_TO_TCP_SOCK; } static bool reg_type_may_be_null(enum bpf_reg_type type) { return type == PTR_TO_MAP_VALUE_OR_NULL || type == PTR_TO_SOCKET_OR_NULL || - type == PTR_TO_SOCK_COMMON_OR_NULL; + type == PTR_TO_SOCK_COMMON_OR_NULL || + type == PTR_TO_TCP_SOCK_OR_NULL; } static bool type_is_refcounted(enum bpf_reg_type type) @@ -350,6 +352,8 @@ static const char * const reg_type_str[] = { [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", [PTR_TO_SOCK_COMMON] = "sock_common", [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", + [PTR_TO_TCP_SOCK] = "tcp_sock", + [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", }; static void print_liveness(struct bpf_verifier_env *env, @@ -1132,6 +1136,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: case PTR_TO_SOCK_COMMON_OR_NULL: + case PTR_TO_TCP_SOCK: + case PTR_TO_TCP_SOCK_OR_NULL: return true; default: return false; @@ -1576,6 +1582,9 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, case PTR_TO_SOCKET: valid = bpf_sock_is_valid_access(off, size, t, &info); break; + case PTR_TO_TCP_SOCK: + valid = bpf_tcp_sock_is_valid_access(off, size, t, &info); + break; default: valid = false; } @@ -1726,6 +1735,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, case PTR_TO_SOCK_COMMON: pointer_desc = "sock_common "; break; + case PTR_TO_TCP_SOCK: + pointer_desc = "tcp_sock "; + break; default: break; } @@ -3093,6 +3105,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn /* For mark_ptr_or_null_reg() */ regs[BPF_REG_0].id = ++env->id_gen; } + } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; + regs[BPF_REG_0].id = ++env->id_gen; } else { verbose(env, "unknown return type %d of func %s#%d\n", fn->ret_type, func_id_name(func_id), func_id); @@ -3528,6 +3544,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: case PTR_TO_SOCK_COMMON_OR_NULL: + case PTR_TO_TCP_SOCK: + case PTR_TO_TCP_SOCK_OR_NULL: verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str[ptr_reg->type]); return -EACCES; @@ -4610,6 +4628,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, reg->type = PTR_TO_SOCKET; } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) { reg->type = PTR_TO_SOCK_COMMON; + } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) { + reg->type = PTR_TO_TCP_SOCK; } if (is_null || !(reg_is_refcounted(reg) || @@ -5670,6 +5690,8 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: case PTR_TO_SOCK_COMMON_OR_NULL: + case PTR_TO_TCP_SOCK: + case PTR_TO_TCP_SOCK_OR_NULL: /* Only valid matches are exact, which memcmp() above * would have accepted */ @@ -5964,6 +5986,8 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type) case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: case PTR_TO_SOCK_COMMON_OR_NULL: + case PTR_TO_TCP_SOCK: + case PTR_TO_TCP_SOCK_OR_NULL: return false; default: return true; @@ -6726,6 +6750,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) case PTR_TO_SOCK_COMMON: convert_ctx_access = bpf_sock_convert_ctx_access; break; + case PTR_TO_TCP_SOCK: + convert_ctx_access = bpf_tcp_sock_convert_ctx_access; + break; default: continue; } diff --git a/net/core/filter.c b/net/core/filter.c index 63ceceb4ace2..a166f675e817 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3692,6 +3692,121 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { .arg5_type = ARG_ANYTHING, }; +bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, bytes_acked)) + return false; + if (off % size != 0) + return false; + switch (off) { + case offsetof(struct bpf_tcp_sock, bytes_received): + case offsetof(struct bpf_tcp_sock, bytes_acked): + return size == sizeof(__u64); + default: + return size == sizeof(__u32); + } +} + +#define CONVERT_COMMON_TCP_SOCK_FIELDS(md_type, CONVERT) \ +do { \ + switch (si->off) { \ + case offsetof(md_type, snd_cwnd): \ + CONVERT(snd_cwnd); break; \ + case offsetof(md_type, srtt_us): \ + CONVERT(srtt_us); break; \ + case offsetof(md_type, snd_ssthresh): \ + CONVERT(snd_ssthresh); break; \ + case offsetof(md_type, rcv_nxt): \ + CONVERT(rcv_nxt); break; \ + case offsetof(md_type, snd_nxt): \ + CONVERT(snd_nxt); break; \ + case offsetof(md_type, snd_una): \ + CONVERT(snd_una); break; \ + case offsetof(md_type, mss_cache): \ + CONVERT(mss_cache); break; \ + case offsetof(md_type, ecn_flags): \ + CONVERT(ecn_flags); break; \ + case offsetof(md_type, rate_delivered): \ + CONVERT(rate_delivered); break; \ + case offsetof(md_type, rate_interval_us): \ + CONVERT(rate_interval_us); break; \ + case offsetof(md_type, packets_out): \ + CONVERT(packets_out); break; \ + case offsetof(md_type, retrans_out): \ + CONVERT(retrans_out); break; \ + case offsetof(md_type, total_retrans): \ + CONVERT(total_retrans); break; \ + case offsetof(md_type, segs_in): \ + CONVERT(segs_in); break; \ + case offsetof(md_type, data_segs_in): \ + CONVERT(data_segs_in); break; \ + case offsetof(md_type, segs_out): \ + CONVERT(segs_out); break; \ + case offsetof(md_type, data_segs_out): \ + CONVERT(data_segs_out); break; \ + case offsetof(md_type, lost_out): \ + CONVERT(lost_out); break; \ + case offsetof(md_type, sacked_out): \ + CONVERT(sacked_out); break; \ + case offsetof(md_type, bytes_received): \ + CONVERT(bytes_received); break; \ + case offsetof(md_type, bytes_acked): \ + CONVERT(bytes_acked); break; \ + } \ +} while (0) + +u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, u32 *target_size) +{ + struct bpf_insn *insn = insn_buf; + +#define BPF_TCP_SOCK_GET_COMMON(FIELD) \ + do { \ + BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) > \ + FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\ + si->dst_reg, si->src_reg, \ + offsetof(struct tcp_sock, FIELD)); \ + } while (0) + CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_tcp_sock, + BPF_TCP_SOCK_GET_COMMON); + if (insn > insn_buf) + return insn - insn_buf; + + switch (si->off) { + case offsetof(struct bpf_tcp_sock, rtt_min): + BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) != + sizeof(struct minmax)); + BUILD_BUG_ON(sizeof(struct minmax) < + sizeof(struct minmax_sample)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct tcp_sock, rtt_min) + + offsetof(struct minmax_sample, v)); + break; + } + return insn - insn_buf; +} + +BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) +{ + sk = sk_to_full_sk(sk); + + if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) + return (unsigned long)sk; + + return (unsigned long)NULL; +} + +static const struct bpf_func_proto bpf_tcp_sock_proto = { + .func = bpf_tcp_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, +}; + #endif /* CONFIG_INET */ static unsigned long bpf_skb_copy(void *dst_buff, const void *skb, @@ -5182,6 +5297,10 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_local_storage_proto; case BPF_FUNC_sk_fullsock: return &bpf_sk_fullsock_proto; +#ifdef CONFIG_INET + case BPF_FUNC_tcp_sock: + return &bpf_tcp_sock_proto; +#endif default: return sk_filter_func_proto(func_id, prog); } @@ -5273,6 +5392,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_lookup_udp_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; + case BPF_FUNC_tcp_sock: + return &bpf_tcp_sock_proto; default: return bpf_base_func_proto(func_id); }