Compare commits
56 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
60900f18f0 | ||
|
|
38f5ab5930 | ||
|
|
5c9f4df943 | ||
|
|
5200aeec61 | ||
|
|
563dac970e | ||
|
|
01ba999ffa | ||
|
|
914857cb61 | ||
|
|
9dd1d18df4 | ||
|
|
6ebbed30eb | ||
|
|
8c11f21e93 | ||
|
|
a37f02ec18 | ||
|
|
ef98bc622b | ||
|
|
71b5f50e02 | ||
|
|
8820d200b8 | ||
|
|
b82491e4b4 | ||
|
|
0d7c4a6dc9 | ||
|
|
39ffcc94ad | ||
|
|
24650d3d81 | ||
|
|
613ef16196 | ||
|
|
e800a1193c | ||
|
|
2df8f1898b | ||
|
|
1b176bd2e7 | ||
|
|
d2cbfd4dee | ||
|
|
918976b7b5 | ||
|
|
ccbd492501 | ||
|
|
c5bd40a8d2 | ||
|
|
22e42eed71 | ||
|
|
712d784704 | ||
|
|
4e80d0323f | ||
|
|
8072a74d24 | ||
|
|
73b141d7f4 | ||
|
|
f43e033e8c | ||
|
|
69cec1fbd7 | ||
|
|
ff79e1d1f0 | ||
|
|
fb29039de2 | ||
|
|
8d6584fbf9 | ||
|
|
ccac0effe5 | ||
|
|
bafeeb36ce | ||
|
|
a6fd4fa0e2 | ||
|
|
eeb053602a | ||
|
|
427d36bab5 | ||
|
|
7f8339e1bf | ||
|
|
60ef26bc49 | ||
|
|
2061b38681 | ||
|
|
e1ee1ed780 | ||
|
|
806b9915bd | ||
|
|
0dba23ab37 | ||
|
|
17923cb9ba | ||
|
|
cb8d8dc693 | ||
|
|
c9cd8002d8 | ||
|
|
c6ed9aa242 | ||
|
|
51233da9ef | ||
|
|
986e2dd35f | ||
|
|
2db698c96a | ||
|
|
d4bf99abcd | ||
|
|
8c93398cfe |
@@ -692,6 +692,7 @@
|
||||
00 00 0a 0a
|
||||
00 00 8a 8a];
|
||||
qcom,platform-regulator-settings = [1d 1d 1d 1d 1d];
|
||||
qcom,panel-allow-phy-poweroff;
|
||||
qcom,phy-supply-entries {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
@@ -1177,6 +1177,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
|
||||
}
|
||||
|
||||
if (!image || !prog->is_func || extra_pass) {
|
||||
if (image)
|
||||
bpf_prog_fill_jited_linfo(prog, addrs);
|
||||
out_addrs:
|
||||
kfree(addrs);
|
||||
kfree(jit_data);
|
||||
|
||||
@@ -533,6 +533,22 @@ static struct tp_common_ops double_tap_ops = {
|
||||
.show = double_tap_show,
|
||||
.store = double_tap_store,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_TOUCHSCREEN_FOD
|
||||
static ssize_t fp_state_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
if (!fts_info)
|
||||
return -EINVAL;
|
||||
|
||||
return sprintf(buf, "%d,%d,%d\n", fts_info->fod_pressed_x, fts_info->fod_pressed_y,
|
||||
fts_info->fod_pressed);
|
||||
}
|
||||
|
||||
static struct tp_common_ops fp_state_ops = {
|
||||
.show = fp_state_show,
|
||||
};
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef GRIP_MODE
|
||||
@@ -4303,6 +4319,10 @@ static void fts_gesture_event_handler(struct fts_ts_info *info,
|
||||
needCoords = 1;
|
||||
#ifdef CONFIG_TOUCHSCREEN_FOD
|
||||
if (event[2] == GEST_ID_LONG_PRESS) {
|
||||
info->fod_pressed = true;
|
||||
info->fod_pressed_x = x;
|
||||
info->fod_pressed_y = y;
|
||||
tp_common_notify_fp_state();
|
||||
if (!info->fod_down &&
|
||||
(info->fod_status == 1 || info->fod_status == 2)) {
|
||||
MI_TOUCH_LOGI(1, "%s %s: FOD Down\n", tag,
|
||||
@@ -4327,7 +4347,6 @@ static void fts_gesture_event_handler(struct fts_ts_info *info,
|
||||
if ((info->sensor_sleep &&
|
||||
!info->sleep_finger) ||
|
||||
!info->sensor_sleep) {
|
||||
info->fod_pressed = true;
|
||||
input_report_key(info->input_dev,
|
||||
BTN_INFO, 1);
|
||||
input_sync(info->input_dev);
|
||||
@@ -4422,6 +4441,9 @@ static void fts_gesture_event_handler(struct fts_ts_info *info,
|
||||
info->sleep_finger = 0;
|
||||
info->fod_overlap = 0;
|
||||
info->fod_pressed = false;
|
||||
info->fod_pressed_x = 0;
|
||||
info->fod_pressed_y = 0;
|
||||
tp_common_notify_fp_state();
|
||||
goto gesture_done;
|
||||
}
|
||||
#endif
|
||||
@@ -8241,6 +8263,7 @@ static int fts_probe(struct spi_device *client)
|
||||
tp_common_set_double_tap_ops(&double_tap_ops);
|
||||
#ifdef CONFIG_TOUCHSCREEN_FOD
|
||||
tp_common_set_fod_status_ops(&fod_status_ops);
|
||||
tp_common_set_fp_state_ops(&fp_state_ops);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
@@ -387,6 +387,8 @@ struct fts_ts_info {
|
||||
unsigned long fod_id;
|
||||
unsigned long fod_x;
|
||||
unsigned long fod_y;
|
||||
unsigned long fod_pressed_x;
|
||||
unsigned long fod_pressed_y;
|
||||
struct mutex fod_mutex;
|
||||
struct mutex cmd_update_mutex;
|
||||
bool fod_coordinate_update;
|
||||
|
||||
@@ -13,10 +13,18 @@ struct kobject *touchpanel_kobj;
|
||||
return sysfs_create_file(touchpanel_kobj, &kattr.attr); \
|
||||
}
|
||||
|
||||
#define TS_ENABLE_NOTIFY(type) \
|
||||
void tp_common_notify_##type(void) \
|
||||
{ \
|
||||
sysfs_notify(touchpanel_kobj, NULL, __stringify(type)); \
|
||||
}
|
||||
|
||||
TS_ENABLE_FOPS(capacitive_keys)
|
||||
TS_ENABLE_FOPS(double_tap)
|
||||
TS_ENABLE_FOPS(fod_status)
|
||||
TS_ENABLE_FOPS(fp_state)
|
||||
TS_ENABLE_FOPS(reversed_keys)
|
||||
TS_ENABLE_NOTIFY(fp_state)
|
||||
|
||||
static int __init tp_common_init(void)
|
||||
{
|
||||
|
||||
@@ -387,10 +387,7 @@ static bool pd_get_bms_digest_verified(struct usbpd_pm *pdpm)
|
||||
|
||||
pr_err("pval.intval: %d\n", pval.intval);
|
||||
|
||||
if (pval.intval == 1)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* get bq27z561 chip ok*/
|
||||
|
||||
46
fs/nsfs.c
46
fs/nsfs.c
@@ -51,7 +51,7 @@ static void nsfs_evict(struct inode *inode)
|
||||
ns->ops->put(ns);
|
||||
}
|
||||
|
||||
static void *__ns_get_path(struct path *path, struct ns_common *ns)
|
||||
static int __ns_get_path(struct path *path, struct ns_common *ns)
|
||||
{
|
||||
struct vfsmount *mnt = nsfs_mnt;
|
||||
struct dentry *dentry;
|
||||
@@ -70,13 +70,13 @@ static void *__ns_get_path(struct path *path, struct ns_common *ns)
|
||||
got_it:
|
||||
path->mnt = mntget(mnt);
|
||||
path->dentry = dentry;
|
||||
return NULL;
|
||||
return 0;
|
||||
slow:
|
||||
rcu_read_unlock();
|
||||
inode = new_inode_pseudo(mnt->mnt_sb);
|
||||
if (!inode) {
|
||||
ns->ops->put(ns);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
return -ENOMEM;
|
||||
}
|
||||
inode->i_ino = ns->inum;
|
||||
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
|
||||
@@ -88,7 +88,7 @@ static void *__ns_get_path(struct path *path, struct ns_common *ns)
|
||||
dentry = d_alloc_anon(mnt->mnt_sb);
|
||||
if (!dentry) {
|
||||
iput(inode);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
return -ENOMEM;
|
||||
}
|
||||
d_instantiate(dentry, inode);
|
||||
dentry->d_fsdata = (void *)ns->ops;
|
||||
@@ -97,25 +97,22 @@ static void *__ns_get_path(struct path *path, struct ns_common *ns)
|
||||
d_delete(dentry); /* make sure ->d_prune() does nothing */
|
||||
dput(dentry);
|
||||
cpu_relax();
|
||||
return ERR_PTR(-EAGAIN);
|
||||
return -EAGAIN;
|
||||
}
|
||||
goto got_it;
|
||||
}
|
||||
|
||||
void *ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb,
|
||||
int ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb,
|
||||
void *private_data)
|
||||
{
|
||||
struct ns_common *ns;
|
||||
void *ret;
|
||||
int ret;
|
||||
|
||||
again:
|
||||
ns = ns_get_cb(private_data);
|
||||
if (!ns)
|
||||
return ERR_PTR(-ENOENT);
|
||||
|
||||
ret = __ns_get_path(path, ns);
|
||||
if (IS_ERR(ret) && PTR_ERR(ret) == -EAGAIN)
|
||||
goto again;
|
||||
do {
|
||||
struct ns_common *ns = ns_get_cb(private_data);
|
||||
if (!ns)
|
||||
return -ENOENT;
|
||||
ret = __ns_get_path(path, ns);
|
||||
} while (ret == -EAGAIN);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -130,8 +127,7 @@ static struct ns_common *ns_get_path_task(void *private_data)
|
||||
|
||||
return args->ns_ops->get(args->task);
|
||||
}
|
||||
|
||||
void *ns_get_path(struct path *path, struct task_struct *task,
|
||||
int ns_get_path(struct path *path, struct task_struct *task,
|
||||
const struct proc_ns_operations *ns_ops)
|
||||
{
|
||||
struct ns_get_path_task_args args = {
|
||||
@@ -147,14 +143,14 @@ int open_related_ns(struct ns_common *ns,
|
||||
{
|
||||
struct path path = {};
|
||||
struct file *f;
|
||||
void *err;
|
||||
int err;
|
||||
int fd;
|
||||
|
||||
fd = get_unused_fd_flags(O_CLOEXEC);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
while (1) {
|
||||
do {
|
||||
struct ns_common *relative;
|
||||
|
||||
relative = get_ns(ns);
|
||||
@@ -164,13 +160,11 @@ int open_related_ns(struct ns_common *ns,
|
||||
}
|
||||
|
||||
err = __ns_get_path(&path, relative);
|
||||
if (IS_ERR(err) && PTR_ERR(err) == -EAGAIN)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
if (IS_ERR(err)) {
|
||||
} while (err == -EAGAIN);
|
||||
|
||||
if (err) {
|
||||
put_unused_fd(fd);
|
||||
return PTR_ERR(err);
|
||||
return err;
|
||||
}
|
||||
|
||||
f = dentry_open(&path, O_RDONLY, current_cred());
|
||||
|
||||
@@ -42,14 +42,14 @@ static const char *proc_ns_get_link(struct dentry *dentry,
|
||||
const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
|
||||
struct task_struct *task;
|
||||
struct path ns_path;
|
||||
void *error = ERR_PTR(-EACCES);
|
||||
int error = -EACCES;
|
||||
|
||||
if (!dentry)
|
||||
return ERR_PTR(-ECHILD);
|
||||
|
||||
task = get_proc_task(inode);
|
||||
if (!task)
|
||||
return error;
|
||||
return ERR_PTR(-EACCES);
|
||||
|
||||
if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
|
||||
error = ns_get_path(&ns_path, task, ns_ops);
|
||||
@@ -57,7 +57,7 @@ static const char *proc_ns_get_link(struct dentry *dentry,
|
||||
nd_jump_link(&ns_path);
|
||||
}
|
||||
put_task_struct(task);
|
||||
return error;
|
||||
return ERR_PTR(error);
|
||||
}
|
||||
|
||||
static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen)
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <linux/namei.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/bpf-cgroup.h>
|
||||
#include <linux/kmemleak.h>
|
||||
#include "internal.h"
|
||||
|
||||
@@ -593,6 +594,10 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
|
||||
if (!table->proc_handler)
|
||||
goto out;
|
||||
|
||||
error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
/* careful: calling conventions are nasty here */
|
||||
res = count;
|
||||
error = table->proc_handler(table, write, buf, &res, ppos);
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#ifndef _BPF_CGROUP_H
|
||||
#define _BPF_CGROUP_H
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/percpu.h>
|
||||
@@ -17,12 +18,16 @@ struct bpf_prog;
|
||||
struct bpf_sock_ops_kern;
|
||||
struct bpf_cgroup_storage;
|
||||
|
||||
struct ctl_table;
|
||||
struct ctl_table_header;
|
||||
#ifdef CONFIG_CGROUP_BPF
|
||||
|
||||
extern struct static_key_false cgroup_bpf_enabled_key;
|
||||
#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
|
||||
|
||||
DECLARE_PER_CPU(void*, bpf_cgroup_storage);
|
||||
DECLARE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
|
||||
#define for_each_cgroup_storage_type(stype) \
|
||||
for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
|
||||
|
||||
struct bpf_cgroup_storage_map;
|
||||
|
||||
@@ -32,7 +37,10 @@ struct bpf_storage_buffer {
|
||||
};
|
||||
|
||||
struct bpf_cgroup_storage {
|
||||
struct bpf_storage_buffer *buf;
|
||||
union {
|
||||
struct bpf_storage_buffer *buf;
|
||||
void __percpu *percpu_buf;
|
||||
};
|
||||
struct bpf_cgroup_storage_map *map;
|
||||
struct bpf_cgroup_storage_key key;
|
||||
struct list_head list;
|
||||
@@ -43,7 +51,7 @@ struct bpf_cgroup_storage {
|
||||
struct bpf_prog_list {
|
||||
struct list_head node;
|
||||
struct bpf_prog *prog;
|
||||
struct bpf_cgroup_storage *storage;
|
||||
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
|
||||
};
|
||||
|
||||
struct bpf_prog_array;
|
||||
@@ -86,6 +94,14 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
|
||||
struct sk_buff *skb,
|
||||
enum bpf_attach_type type);
|
||||
|
||||
int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level,
|
||||
int *optname, char __user *optval,
|
||||
int *optlen, char **kernel_optval);
|
||||
int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
|
||||
int optname, char __user *optval,
|
||||
int __user *optlen, int max_optlen,
|
||||
int retval);
|
||||
|
||||
int __cgroup_bpf_run_filter_sk(struct sock *sk,
|
||||
enum bpf_attach_type type);
|
||||
|
||||
@@ -101,18 +117,29 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
|
||||
int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
|
||||
short access, enum bpf_attach_type type);
|
||||
|
||||
static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage)
|
||||
static inline enum bpf_cgroup_storage_type cgroup_storage_type(
|
||||
struct bpf_map *map)
|
||||
{
|
||||
if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
|
||||
return BPF_CGROUP_STORAGE_PERCPU;
|
||||
|
||||
return BPF_CGROUP_STORAGE_SHARED;
|
||||
}
|
||||
static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
|
||||
*storage[MAX_BPF_CGROUP_STORAGE_TYPE])
|
||||
{
|
||||
enum bpf_cgroup_storage_type stype;
|
||||
struct bpf_storage_buffer *buf;
|
||||
|
||||
if (!storage)
|
||||
return;
|
||||
|
||||
buf = READ_ONCE(storage->buf);
|
||||
this_cpu_write(bpf_cgroup_storage, &buf->data[0]);
|
||||
for_each_cgroup_storage_type(stype) {
|
||||
if (!storage[stype])
|
||||
continue;
|
||||
buf = READ_ONCE(storage[stype]->buf);
|
||||
this_cpu_write(bpf_cgroup_storage[stype], &buf->data[0]);
|
||||
}
|
||||
}
|
||||
|
||||
struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog);
|
||||
struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
|
||||
enum bpf_cgroup_storage_type stype);
|
||||
void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
|
||||
void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
|
||||
struct cgroup *cgroup,
|
||||
@@ -121,6 +148,13 @@ void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
|
||||
int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map);
|
||||
void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map);
|
||||
|
||||
int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
|
||||
struct ctl_table *table, int write,
|
||||
enum bpf_attach_type type);
|
||||
int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
|
||||
int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
|
||||
void *value, u64 flags);
|
||||
|
||||
/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
|
||||
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
|
||||
({ \
|
||||
@@ -245,6 +279,47 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr,
|
||||
enum bpf_prog_type ptype);
|
||||
int cgroup_bpf_prog_query(const union bpf_attr *attr,
|
||||
union bpf_attr __user *uattr);
|
||||
|
||||
#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) \
|
||||
({ \
|
||||
int __ret = 0; \
|
||||
if (cgroup_bpf_enabled) \
|
||||
__ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \
|
||||
BPF_CGROUP_SYSCTL); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
|
||||
kernel_optval) \
|
||||
({ \
|
||||
int __ret = 0; \
|
||||
if (cgroup_bpf_enabled) \
|
||||
__ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \
|
||||
optname, optval, \
|
||||
optlen, \
|
||||
kernel_optval); \
|
||||
__ret; \
|
||||
})
|
||||
#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \
|
||||
({ \
|
||||
int __ret = 0; \
|
||||
if (cgroup_bpf_enabled) \
|
||||
get_user(__ret, optlen); \
|
||||
__ret; \
|
||||
})
|
||||
#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen, \
|
||||
max_optlen, retval) \
|
||||
({ \
|
||||
int __ret = retval; \
|
||||
if (cgroup_bpf_enabled) \
|
||||
__ret = __cgroup_bpf_run_filter_getsockopt(sock, level, \
|
||||
optname, optval, \
|
||||
optlen, max_optlen, \
|
||||
retval); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
|
||||
#else
|
||||
|
||||
struct bpf_prog;
|
||||
@@ -271,17 +346,58 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {}
|
||||
#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
|
||||
kernel_optval) \
|
||||
({ \
|
||||
int __ret = 0; \
|
||||
if (cgroup_bpf_enabled) \
|
||||
__ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \
|
||||
optname, optval, \
|
||||
optlen, \
|
||||
kernel_optval); \
|
||||
__ret; \
|
||||
})
|
||||
#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \
|
||||
({ \
|
||||
int __ret = 0; \
|
||||
if (cgroup_bpf_enabled) \
|
||||
get_user(__ret, optlen); \
|
||||
__ret; \
|
||||
})
|
||||
#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen, \
|
||||
max_optlen, retval) \
|
||||
({ \
|
||||
int __ret = retval; \
|
||||
if (cgroup_bpf_enabled) \
|
||||
__ret = __cgroup_bpf_run_filter_getsockopt(sock, level, \
|
||||
optname, optval, \
|
||||
optlen, max_optlen, \
|
||||
retval); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
static inline void bpf_cgroup_storage_set(
|
||||
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
|
||||
static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
|
||||
struct bpf_map *map) { return 0; }
|
||||
static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
|
||||
struct bpf_map *map) {}
|
||||
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
|
||||
struct bpf_prog *prog) { return 0; }
|
||||
struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return 0; }
|
||||
static inline void bpf_cgroup_storage_free(
|
||||
struct bpf_cgroup_storage *storage) {}
|
||||
|
||||
#define cgroup_bpf_enabled (0)
|
||||
|
||||
static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key,
|
||||
void *value) {
|
||||
return 0;
|
||||
}
|
||||
static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
|
||||
void *key, void *value, u64 flags) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
|
||||
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
|
||||
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
|
||||
@@ -300,6 +416,14 @@ static inline void bpf_cgroup_storage_free(
|
||||
#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; })
|
||||
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
|
||||
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
|
||||
#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) ({ 0; })
|
||||
#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; })
|
||||
#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \
|
||||
optlen, max_optlen, retval) ({ retval; })
|
||||
#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
|
||||
kernel_optval) ({ 0; })
|
||||
|
||||
#define for_each_cgroup_storage_type(stype) for (; false; )
|
||||
|
||||
#endif /* CONFIG_CGROUP_BPF */
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ struct bpf_prog;
|
||||
struct bpf_map;
|
||||
struct sock;
|
||||
struct seq_file;
|
||||
struct btf;
|
||||
struct btf_type;
|
||||
|
||||
/* map is generic key/value storage optionally accesible by eBPF programs */
|
||||
@@ -54,8 +55,15 @@ struct bpf_map_ops {
|
||||
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
|
||||
struct seq_file *m);
|
||||
int (*map_check_btf)(const struct bpf_map *map,
|
||||
const struct btf *btf,
|
||||
const struct btf_type *key_type,
|
||||
const struct btf_type *value_type);
|
||||
|
||||
/* Direct value access helpers. */
|
||||
int (*map_direct_value_addr)(const struct bpf_map *map,
|
||||
u64 *imm, u32 off);
|
||||
int (*map_direct_value_meta)(const struct bpf_map *map,
|
||||
u64 imm, u32 *off);
|
||||
};
|
||||
|
||||
struct bpf_map {
|
||||
@@ -72,14 +80,15 @@ struct bpf_map {
|
||||
u32 value_size;
|
||||
u32 max_entries;
|
||||
u32 map_flags;
|
||||
u32 pages;
|
||||
int spin_lock_off; /* >=0 valid offset, <0 error */
|
||||
u32 id;
|
||||
int numa_node;
|
||||
u32 btf_key_type_id;
|
||||
u32 btf_value_type_id;
|
||||
struct btf *btf;
|
||||
u32 pages;
|
||||
bool unpriv_array;
|
||||
/* 55 bytes hole */
|
||||
/* 51 bytes hole */
|
||||
|
||||
/* The 3rd and 4th cacheline with misc members to avoid false sharing
|
||||
* particularly with refcounting.
|
||||
@@ -92,6 +101,36 @@ struct bpf_map {
|
||||
};
|
||||
|
||||
struct bpf_offload_dev;
|
||||
static inline bool map_value_has_spin_lock(const struct bpf_map *map)
|
||||
{
|
||||
return map->spin_lock_off >= 0;
|
||||
}
|
||||
|
||||
static inline void check_and_init_map_lock(struct bpf_map *map, void *dst)
|
||||
{
|
||||
if (likely(!map_value_has_spin_lock(map)))
|
||||
return;
|
||||
*(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
|
||||
(struct bpf_spin_lock){};
|
||||
}
|
||||
|
||||
/* copy everything but bpf_spin_lock */
|
||||
static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
|
||||
{
|
||||
if (unlikely(map_value_has_spin_lock(map))) {
|
||||
u32 off = map->spin_lock_off;
|
||||
memcpy(dst, src, off);
|
||||
memcpy(dst + off + sizeof(struct bpf_spin_lock),
|
||||
src + off + sizeof(struct bpf_spin_lock),
|
||||
map->value_size - off - sizeof(struct bpf_spin_lock));
|
||||
} else {
|
||||
memcpy(dst, src, map->value_size);
|
||||
}
|
||||
}
|
||||
|
||||
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
|
||||
bool lock_src);
|
||||
|
||||
struct bpf_offloaded_map;
|
||||
|
||||
struct bpf_map_dev_ops {
|
||||
@@ -128,6 +167,7 @@ static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
|
||||
}
|
||||
|
||||
int map_check_no_btf(const struct bpf_map *map,
|
||||
const struct btf *btf,
|
||||
const struct btf_type *key_type,
|
||||
const struct btf_type *value_type);
|
||||
|
||||
@@ -143,6 +183,8 @@ enum bpf_arg_type {
|
||||
ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */
|
||||
ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */
|
||||
ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */
|
||||
ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */
|
||||
ARG_PTR_TO_MAP_VALUE_OR_NULL, /* pointer to stack used as map value or NULL */
|
||||
|
||||
/* the following constraints used to prototype bpf_memcmp() and other
|
||||
* functions that access data on eBPF program stack
|
||||
@@ -159,6 +201,11 @@ enum bpf_arg_type {
|
||||
|
||||
ARG_PTR_TO_CTX, /* pointer to context */
|
||||
ARG_ANYTHING, /* any (initialized) argument is ok */
|
||||
ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
|
||||
ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
|
||||
ARG_PTR_TO_INT, /* pointer to int */
|
||||
ARG_PTR_TO_LONG, /* pointer to long */
|
||||
ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */
|
||||
};
|
||||
|
||||
/* type of values returned from helper functions */
|
||||
@@ -167,6 +214,9 @@ enum bpf_return_type {
|
||||
RET_VOID, /* function doesn't return anything */
|
||||
RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */
|
||||
RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */
|
||||
RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */
|
||||
RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */
|
||||
RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
|
||||
};
|
||||
|
||||
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
|
||||
@@ -217,6 +267,14 @@ enum bpf_reg_type {
|
||||
PTR_TO_PACKET_META, /* skb->data - meta_len */
|
||||
PTR_TO_PACKET, /* reg points to skb->data */
|
||||
PTR_TO_PACKET_END, /* skb->data + headlen */
|
||||
PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */
|
||||
PTR_TO_SOCKET, /* reg points to struct bpf_sock */
|
||||
PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */
|
||||
PTR_TO_SOCK_COMMON, /* reg points to sock_common */
|
||||
PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
|
||||
PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
|
||||
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
|
||||
PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
|
||||
};
|
||||
|
||||
/* The information passed from prog-specific *_is_valid_access
|
||||
@@ -276,14 +334,23 @@ struct bpf_prog_offload {
|
||||
u32 jited_len;
|
||||
};
|
||||
|
||||
enum bpf_cgroup_storage_type {
|
||||
BPF_CGROUP_STORAGE_SHARED,
|
||||
BPF_CGROUP_STORAGE_PERCPU,
|
||||
__BPF_CGROUP_STORAGE_MAX
|
||||
};
|
||||
#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
|
||||
|
||||
struct bpf_prog_aux {
|
||||
atomic_t refcnt;
|
||||
u32 used_map_cnt;
|
||||
u32 max_ctx_offset;
|
||||
u32 max_tp_access;
|
||||
u32 stack_depth;
|
||||
u32 id;
|
||||
u32 func_cnt;
|
||||
bool offload_requested;
|
||||
u32 func_cnt; /* used by non-func prog as the number of func progs */
|
||||
u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
|
||||
struct bpf_prog **func;
|
||||
void *jit_data; /* JIT specific data. arch dependent */
|
||||
struct latch_tree_node ksym_tnode;
|
||||
@@ -293,12 +360,36 @@ struct bpf_prog_aux {
|
||||
struct bpf_prog *prog;
|
||||
struct user_struct *user;
|
||||
u64 load_time; /* ns since boottime */
|
||||
struct bpf_map *cgroup_storage;
|
||||
struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
|
||||
char name[BPF_OBJ_NAME_LEN];
|
||||
#ifdef CONFIG_SECURITY
|
||||
void *security;
|
||||
#endif
|
||||
struct bpf_prog_offload *offload;
|
||||
struct btf *btf;
|
||||
struct bpf_func_info *func_info;
|
||||
/* bpf_line_info loaded from userspace. linfo->insn_off
|
||||
* has the xlated insn offset.
|
||||
* Both the main and sub prog share the same linfo.
|
||||
* The subprog can access its first linfo by
|
||||
* using the linfo_idx.
|
||||
*/
|
||||
struct bpf_line_info *linfo;
|
||||
/* jited_linfo is the jited addr of the linfo. It has a
|
||||
* one to one mapping to linfo:
|
||||
* jited_linfo[i] is the jited addr for the linfo[i]->insn_off.
|
||||
* Both the main and sub prog share the same jited_linfo.
|
||||
* The subprog can access its first jited_linfo by
|
||||
* using the linfo_idx.
|
||||
*/
|
||||
void **jited_linfo;
|
||||
u32 func_info_cnt;
|
||||
u32 nr_linfo;
|
||||
/* subprog can use linfo_idx to access its first linfo and
|
||||
* jited_linfo.
|
||||
* main prog always has linfo_idx == 0
|
||||
*/
|
||||
u32 linfo_idx;
|
||||
union {
|
||||
struct work_struct work;
|
||||
struct rcu_head rcu;
|
||||
@@ -339,6 +430,11 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
|
||||
|
||||
typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
|
||||
unsigned long off, unsigned long len);
|
||||
typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type,
|
||||
const struct bpf_insn *src,
|
||||
struct bpf_insn *dst,
|
||||
struct bpf_prog *prog,
|
||||
u32 *target_size);
|
||||
|
||||
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
|
||||
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
|
||||
@@ -362,7 +458,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
|
||||
*/
|
||||
struct bpf_prog_array_item {
|
||||
struct bpf_prog *prog;
|
||||
struct bpf_cgroup_storage *cgroup_storage;
|
||||
struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
|
||||
};
|
||||
|
||||
struct bpf_prog_array {
|
||||
@@ -373,6 +469,7 @@ struct bpf_prog_array {
|
||||
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
|
||||
void bpf_prog_array_free(struct bpf_prog_array __rcu *progs);
|
||||
int bpf_prog_array_length(struct bpf_prog_array __rcu *progs);
|
||||
bool bpf_prog_array_is_empty(struct bpf_prog_array *array);
|
||||
int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
|
||||
__u32 __user *prog_ids, u32 cnt);
|
||||
|
||||
@@ -506,14 +603,18 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
|
||||
}
|
||||
|
||||
/* verify correctness of eBPF program */
|
||||
int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
|
||||
int bpf_check(struct bpf_prog **fp, union bpf_attr *attr,
|
||||
union bpf_attr __user *uattr);
|
||||
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
|
||||
void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
|
||||
#endif
|
||||
|
||||
/* Map specifics */
|
||||
struct xdp_buff;
|
||||
struct sk_buff;
|
||||
|
||||
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
|
||||
struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
|
||||
void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
|
||||
void __dev_map_flush(struct bpf_map *map);
|
||||
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
|
||||
@@ -600,6 +701,12 @@ static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map,
|
||||
u32 key)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void __dev_map_insert_ctx(struct bpf_map *map, u32 index)
|
||||
{
|
||||
}
|
||||
@@ -737,33 +844,18 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
|
||||
}
|
||||
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
|
||||
|
||||
#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET)
|
||||
struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
|
||||
struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key);
|
||||
int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
|
||||
int sockmap_get_from_fd(const union bpf_attr *attr, int type,
|
||||
struct bpf_prog *prog);
|
||||
#if defined(CONFIG_BPF_STREAM_PARSER)
|
||||
int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which);
|
||||
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
|
||||
#else
|
||||
static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct sock *__sock_hash_lookup_elem(struct bpf_map *map,
|
||||
void *key)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int sock_map_prog(struct bpf_map *map,
|
||||
struct bpf_prog *prog,
|
||||
u32 type)
|
||||
static inline int sock_map_prog_update(struct bpf_map *map,
|
||||
struct bpf_prog *prog, u32 which)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline int sockmap_get_from_fd(const union bpf_attr *attr, int type,
|
||||
struct bpf_prog *prog)
|
||||
static inline int sock_map_get_from_fd(const union bpf_attr *attr,
|
||||
struct bpf_prog *prog)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -840,11 +932,77 @@ extern const struct bpf_func_proto bpf_get_stack_proto;
|
||||
extern const struct bpf_func_proto bpf_sock_map_update_proto;
|
||||
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
|
||||
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
|
||||
|
||||
extern const struct bpf_func_proto bpf_spin_lock_proto;
|
||||
extern const struct bpf_func_proto bpf_spin_unlock_proto;
|
||||
extern const struct bpf_func_proto bpf_msg_redirect_hash_proto;
|
||||
extern const struct bpf_func_proto bpf_msg_redirect_map_proto;
|
||||
extern const struct bpf_func_proto bpf_sk_redirect_hash_proto;
|
||||
extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
|
||||
extern const struct bpf_func_proto bpf_get_local_storage_proto;
|
||||
extern const struct bpf_func_proto bpf_tcp_sock_proto;
|
||||
|
||||
/* Shared helpers among cBPF and eBPF. */
|
||||
void bpf_user_rnd_init_once(void);
|
||||
u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
|
||||
|
||||
#if defined(CONFIG_NET)
|
||||
bool bpf_sock_common_is_valid_access(int off, int size,
|
||||
enum bpf_access_type type,
|
||||
struct bpf_insn_access_aux *info);
|
||||
bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
|
||||
struct bpf_insn_access_aux *info);
|
||||
u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
|
||||
const struct bpf_insn *si,
|
||||
struct bpf_insn *insn_buf,
|
||||
struct bpf_prog *prog,
|
||||
u32 *target_size);
|
||||
#else
|
||||
static inline bool bpf_sock_common_is_valid_access(int off, int size,
|
||||
enum bpf_access_type type,
|
||||
struct bpf_insn_access_aux *info)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool bpf_sock_is_valid_access(int off, int size,
|
||||
enum bpf_access_type type,
|
||||
struct bpf_insn_access_aux *info)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
|
||||
const struct bpf_insn *si,
|
||||
struct bpf_insn *insn_buf,
|
||||
struct bpf_prog *prog,
|
||||
u32 *target_size)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_INET
|
||||
bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
|
||||
struct bpf_insn_access_aux *info);
|
||||
u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
|
||||
const struct bpf_insn *si,
|
||||
struct bpf_insn *insn_buf,
|
||||
struct bpf_prog *prog,
|
||||
u32 *target_size);
|
||||
#else
|
||||
static inline bool bpf_tcp_sock_is_valid_access(int off, int size,
|
||||
enum bpf_access_type type,
|
||||
struct bpf_insn_access_aux *info)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
|
||||
const struct bpf_insn *si,
|
||||
struct bpf_insn *insn_buf,
|
||||
struct bpf_prog *prog,
|
||||
u32 *target_size)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_INET */
|
||||
|
||||
#endif /* _LINUX_BPF_H */
|
||||
|
||||
@@ -22,9 +22,12 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
|
||||
#endif
|
||||
#ifdef CONFIG_CGROUP_BPF
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt)
|
||||
#endif
|
||||
#ifdef CONFIG_BPF_LIRC_MODE2
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
|
||||
@@ -33,6 +36,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
|
||||
#endif
|
||||
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector)
|
||||
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops)
|
||||
@@ -42,6 +47,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
|
||||
#endif
|
||||
#ifdef CONFIG_CGROUP_BPF
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops)
|
||||
#endif
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops)
|
||||
@@ -55,6 +61,8 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
|
||||
#ifdef CONFIG_NET
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
|
||||
#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_INET)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
|
||||
|
||||
@@ -61,6 +61,8 @@ struct bpf_reg_state {
|
||||
* offset, so they can share range knowledge.
|
||||
* For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we
|
||||
* came from, when one is tested for != NULL.
|
||||
* For PTR_TO_SOCKET this is used to share which pointers retain the
|
||||
* same reference to the socket, to determine proper reference freeing.
|
||||
*/
|
||||
u32 id;
|
||||
/* For scalar types (SCALAR_VALUE), this represents our knowledge of
|
||||
@@ -104,6 +106,16 @@ struct bpf_stack_state {
|
||||
struct bpf_reg_state spilled_ptr;
|
||||
u8 slot_type[BPF_REG_SIZE];
|
||||
};
|
||||
struct bpf_reference_state {
|
||||
/* Track each reference created with a unique id, even if the same
|
||||
* instruction creates the reference multiple times (eg, via CALL).
|
||||
*/
|
||||
int id;
|
||||
/* Instruction where the allocation of this reference occurred. This
|
||||
* is used purely to inform the user of a reference leak.
|
||||
*/
|
||||
int insn_idx;
|
||||
};
|
||||
|
||||
/* state of the program:
|
||||
* type of all registers and stack info
|
||||
@@ -121,8 +133,9 @@ struct bpf_func_state {
|
||||
* zero == main subprog
|
||||
*/
|
||||
u32 subprogno;
|
||||
|
||||
/* should be second to last. See copy_func_state() */
|
||||
/* The following fields should be last. See copy_func_state() */
|
||||
int acquired_refs;
|
||||
struct bpf_reference_state *refs;
|
||||
int allocated_stack;
|
||||
struct bpf_stack_state *stack;
|
||||
};
|
||||
@@ -138,10 +151,23 @@ struct bpf_id_pair {
|
||||
struct bpf_verifier_state {
|
||||
/* call stack tracking */
|
||||
struct bpf_func_state *frame[MAX_CALL_FRAMES];
|
||||
struct bpf_verifier_state *parent;
|
||||
u32 curframe;
|
||||
u32 active_spin_lock;
|
||||
bool speculative;
|
||||
};
|
||||
|
||||
#define bpf_get_spilled_reg(slot, frame) \
|
||||
(((slot < frame->allocated_stack / BPF_REG_SIZE) && \
|
||||
(frame->stack[slot].slot_type[0] == STACK_SPILL)) \
|
||||
? &frame->stack[slot].spilled_ptr : NULL)
|
||||
|
||||
/* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */
|
||||
#define bpf_for_each_spilled_reg(iter, frame, reg) \
|
||||
for (iter = 0, reg = bpf_get_spilled_reg(iter, frame); \
|
||||
iter < frame->allocated_stack / BPF_REG_SIZE; \
|
||||
iter++, reg = bpf_get_spilled_reg(iter, frame))
|
||||
|
||||
/* linked list of verifier states used to prune search */
|
||||
struct bpf_verifier_state_list {
|
||||
struct bpf_verifier_state state;
|
||||
@@ -163,6 +189,10 @@ struct bpf_insn_aux_data {
|
||||
unsigned long map_state; /* pointer/poison value for maps */
|
||||
s32 call_imm; /* saved imm field of call insn */
|
||||
u32 alu_limit; /* limit for add/sub register with pointer */
|
||||
struct {
|
||||
u32 map_index; /* index into used_maps[] */
|
||||
u32 map_off; /* offset from value base address */
|
||||
};
|
||||
};
|
||||
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
|
||||
bool seen; /* this insn was processed by the verifier */
|
||||
@@ -196,6 +226,7 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
|
||||
|
||||
struct bpf_subprog_info {
|
||||
u32 start; /* insn idx of function entry point */
|
||||
u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
|
||||
u16 stack_depth; /* max. stack depth used by this function */
|
||||
};
|
||||
|
||||
@@ -230,15 +261,28 @@ __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
|
||||
__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
|
||||
const char *fmt, ...);
|
||||
|
||||
static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
|
||||
static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
|
||||
{
|
||||
struct bpf_verifier_state *cur = env->cur_state;
|
||||
|
||||
return cur->frame[cur->curframe]->regs;
|
||||
return cur->frame[cur->curframe];
|
||||
}
|
||||
|
||||
static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
|
||||
{
|
||||
return cur_func(env)->regs;
|
||||
}
|
||||
|
||||
int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env);
|
||||
int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
|
||||
int insn_idx, int prev_insn_idx);
|
||||
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/mm.h>
|
||||
static inline void *__compat_kvcalloc(size_t n, size_t size, gfp_t flags)
|
||||
{
|
||||
return kvmalloc_array(n, size, flags | __GFP_ZERO);
|
||||
}
|
||||
#define kvcalloc __compat_kvcalloc
|
||||
|
||||
#endif /* _LINUX_BPF_VERIFIER_H */
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <linux/types.h>
|
||||
|
||||
struct btf;
|
||||
struct btf_member;
|
||||
struct btf_type;
|
||||
union bpf_attr;
|
||||
|
||||
@@ -46,5 +47,27 @@ void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
|
||||
struct seq_file *m);
|
||||
int btf_get_fd_by_id(u32 id);
|
||||
u32 btf_id(const struct btf *btf);
|
||||
bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
|
||||
const struct btf_member *m,
|
||||
u32 expected_offset, u32 expected_size);
|
||||
|
||||
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
|
||||
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
|
||||
#else
|
||||
static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
|
||||
u32 type_id)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline const char *btf_name_by_offset(const struct btf *btf,
|
||||
u32 offset)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -33,6 +33,8 @@ struct bpf_prog_aux;
|
||||
struct xdp_rxq_info;
|
||||
struct xdp_buff;
|
||||
struct sock_reuseport;
|
||||
struct ctl_table;
|
||||
struct ctl_table_header;
|
||||
|
||||
/* ArgX, context and stack frame pointer register positions. Note,
|
||||
* Arg1, Arg2, Arg3, etc are used as argument mappings of function
|
||||
@@ -485,6 +487,14 @@ struct sock_reuseport;
|
||||
#define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2) \
|
||||
offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1
|
||||
|
||||
#if BITS_PER_LONG == 64
|
||||
# define bpf_ctx_range_ptr(TYPE, MEMBER) \
|
||||
offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
|
||||
#else
|
||||
# define bpf_ctx_range_ptr(TYPE, MEMBER) \
|
||||
offsetof(TYPE, MEMBER) ... offsetof(TYPE, MEMBER) + 8 - 1
|
||||
#endif /* BITS_PER_LONG == 64 */
|
||||
|
||||
#define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE) \
|
||||
({ \
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(TYPE, MEMBER) != (SIZE)); \
|
||||
@@ -615,23 +625,6 @@ struct bpf_skb_data_end {
|
||||
void *data_end;
|
||||
};
|
||||
|
||||
struct sk_msg_buff {
|
||||
void *data;
|
||||
void *data_end;
|
||||
__u32 apply_bytes;
|
||||
__u32 cork_bytes;
|
||||
int sg_copybreak;
|
||||
int sg_start;
|
||||
int sg_curr;
|
||||
int sg_end;
|
||||
struct scatterlist sg_data[MAX_SKB_FRAGS];
|
||||
bool sg_copy[MAX_SKB_FRAGS];
|
||||
__u32 flags;
|
||||
struct sock *sk_redir;
|
||||
struct sock *sk;
|
||||
struct sk_buff *skb;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
struct bpf_redirect_info {
|
||||
u32 ifindex;
|
||||
@@ -811,6 +804,13 @@ void bpf_prog_free(struct bpf_prog *fp);
|
||||
|
||||
bool bpf_opcode_in_insntable(u8 code);
|
||||
|
||||
void bpf_prog_free_linfo(struct bpf_prog *prog);
|
||||
void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
|
||||
const u32 *insn_to_jit_off);
|
||||
int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog);
|
||||
void bpf_prog_free_jited_linfo(struct bpf_prog *prog);
|
||||
void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog);
|
||||
|
||||
struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
|
||||
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
|
||||
gfp_t gfp_extra_flags);
|
||||
@@ -915,8 +915,6 @@ void xdp_do_flush_map(void);
|
||||
|
||||
void bpf_warn_invalid_xdp_action(u32 act);
|
||||
|
||||
struct sock *do_sk_redirect_map(struct sk_buff *skb);
|
||||
struct sock *do_msg_redirect_map(struct sk_msg_buff *md);
|
||||
|
||||
#ifdef CONFIG_INET
|
||||
struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
|
||||
@@ -1199,4 +1197,20 @@ struct bpf_sock_ops_kern {
|
||||
*/
|
||||
};
|
||||
|
||||
struct bpf_sysctl_kern {
|
||||
struct ctl_table_header *head;
|
||||
struct ctl_table *table;
|
||||
int write;
|
||||
};
|
||||
|
||||
struct bpf_sockopt_kern {
|
||||
struct sock *sk;
|
||||
u8 *optval;
|
||||
u8 *optval_end;
|
||||
s32 level;
|
||||
s32 optname;
|
||||
s32 optlen;
|
||||
s32 retval;
|
||||
};
|
||||
|
||||
#endif /* __LINUX_FILTER_H__ */
|
||||
|
||||
@@ -10,7 +10,9 @@ struct tp_common_ops {
|
||||
const char *buf, size_t count);
|
||||
};
|
||||
|
||||
void tp_common_notify_fp_state(void);
|
||||
int tp_common_set_capacitive_keys_ops(struct tp_common_ops *ops);
|
||||
int tp_common_set_double_tap_ops(struct tp_common_ops *ops);
|
||||
int tp_common_set_fod_status_ops(struct tp_common_ops *ops);
|
||||
int tp_common_set_fp_state_ops(struct tp_common_ops *ops);
|
||||
int tp_common_set_reversed_keys_ops(struct tp_common_ops *ops);
|
||||
|
||||
@@ -76,10 +76,10 @@ static inline int ns_alloc_inum(struct ns_common *ns)
|
||||
|
||||
extern struct file *proc_ns_fget(int fd);
|
||||
#define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
|
||||
extern void *ns_get_path(struct path *path, struct task_struct *task,
|
||||
extern int ns_get_path(struct path *path, struct task_struct *task,
|
||||
const struct proc_ns_operations *ns_ops);
|
||||
typedef struct ns_common *ns_get_path_helper_t(void *);
|
||||
extern void *ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb,
|
||||
extern int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb,
|
||||
void *private_data);
|
||||
|
||||
extern int ns_get_name(char *buf, size_t size, struct task_struct *task,
|
||||
|
||||
@@ -243,6 +243,9 @@ struct scatterlist;
|
||||
struct pipe_inode_info;
|
||||
struct iov_iter;
|
||||
struct napi_struct;
|
||||
struct bpf_prog;
|
||||
union bpf_attr;
|
||||
struct skb_ext;
|
||||
|
||||
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
|
||||
struct nf_conntrack {
|
||||
@@ -630,6 +633,7 @@ typedef unsigned char *sk_buff_data_t;
|
||||
* @queue_mapping: Queue mapping for multiqueue devices
|
||||
* @xmit_more: More SKBs are pending for this queue
|
||||
* @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves
|
||||
* @active_extensions: active extensions (skb_ext_id types)
|
||||
* @ndisc_nodetype: router type (from link layer)
|
||||
* @ooo_okay: allow the mapping of a socket to a queue to be changed
|
||||
* @l4_hash: indicate hash is a canonical 4-tuple hash over transport
|
||||
@@ -659,6 +663,7 @@ typedef unsigned char *sk_buff_data_t;
|
||||
* @data: Data head pointer
|
||||
* @truesize: Buffer size
|
||||
* @users: User count - see {datagram,tcp}.c
|
||||
* @extensions: allocated extensions, valid if active_extensions is nonzero
|
||||
*/
|
||||
|
||||
struct sk_buff {
|
||||
@@ -739,7 +744,9 @@ struct sk_buff {
|
||||
head_frag:1,
|
||||
xmit_more:1,
|
||||
pfmemalloc:1;
|
||||
|
||||
#ifdef CONFIG_SKB_EXTENSIONS
|
||||
__u8 active_extensions;
|
||||
#endif
|
||||
/* fields enclosed in headers_start/headers_end are copied
|
||||
* using a single memcpy() in __copy_skb_header()
|
||||
*/
|
||||
@@ -856,6 +863,10 @@ struct sk_buff {
|
||||
*data;
|
||||
unsigned int truesize;
|
||||
refcount_t users;
|
||||
#ifdef CONFIG_SKB_EXTENSIONS
|
||||
/* only useable after checking ->active_extensions != 0 */
|
||||
struct skb_ext *extensions;
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef __KERNEL__
|
||||
@@ -1192,6 +1203,10 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
|
||||
const struct flow_dissector_key *key,
|
||||
unsigned int key_count);
|
||||
|
||||
int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
|
||||
struct bpf_prog *prog);
|
||||
int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr);
|
||||
|
||||
bool __skb_flow_dissect(const struct sk_buff *skb,
|
||||
struct flow_dissector *flow_dissector,
|
||||
void *target_container,
|
||||
@@ -3903,6 +3918,100 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
|
||||
atomic_inc(&nfct->use);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SKB_EXTENSIONS
|
||||
enum skb_ext_id {
|
||||
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
|
||||
SKB_EXT_BRIDGE_NF,
|
||||
#endif
|
||||
SKB_EXT_NUM, /* must be last */
|
||||
};
|
||||
|
||||
/**
|
||||
* struct skb_ext - sk_buff extensions
|
||||
* @refcnt: 1 on allocation, deallocated on 0
|
||||
* @offset: offset to add to @data to obtain extension address
|
||||
* @chunks: size currently allocated, stored in SKB_EXT_ALIGN_SHIFT units
|
||||
* @data: start of extension data, variable sized
|
||||
*
|
||||
* Note: offsets/lengths are stored in chunks of 8 bytes, this allows
|
||||
* to use 'u8' types while allowing up to 2kb worth of extension data.
|
||||
*/
|
||||
struct skb_ext {
|
||||
refcount_t refcnt;
|
||||
u8 offset[SKB_EXT_NUM]; /* in chunks of 8 bytes */
|
||||
u8 chunks; /* same */
|
||||
char data[0] __aligned(8);
|
||||
};
|
||||
|
||||
void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id);
|
||||
void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id);
|
||||
void __skb_ext_put(struct skb_ext *ext);
|
||||
static inline void skb_ext_put(struct sk_buff *skb)
|
||||
{
|
||||
if (skb->active_extensions)
|
||||
__skb_ext_put(skb->extensions);
|
||||
}
|
||||
|
||||
static inline void skb_ext_get(struct sk_buff *skb)
|
||||
{
|
||||
if (skb->active_extensions) {
|
||||
struct skb_ext *ext = skb->extensions;
|
||||
if (ext)
|
||||
refcount_inc(&ext->refcnt);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void __skb_ext_copy(struct sk_buff *dst,
|
||||
const struct sk_buff *src)
|
||||
{
|
||||
dst->active_extensions = src->active_extensions;
|
||||
if (src->active_extensions) {
|
||||
struct skb_ext *ext = src->extensions;
|
||||
refcount_inc(&ext->refcnt);
|
||||
dst->extensions = ext;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *src)
|
||||
{
|
||||
skb_ext_put(dst);
|
||||
__skb_ext_copy(dst, src);
|
||||
}
|
||||
|
||||
static inline bool __skb_ext_exist(const struct skb_ext *ext, enum skb_ext_id i)
|
||||
{
|
||||
return !!ext->offset[i];
|
||||
}
|
||||
|
||||
static inline bool skb_ext_exist(const struct sk_buff *skb, enum skb_ext_id id)
|
||||
{
|
||||
return skb->active_extensions & (1 << id);
|
||||
}
|
||||
|
||||
static inline void skb_ext_del(struct sk_buff *skb, enum skb_ext_id id)
|
||||
{
|
||||
if (skb_ext_exist(skb, id))
|
||||
__skb_ext_del(skb, id);
|
||||
}
|
||||
|
||||
static inline void *skb_ext_find(const struct sk_buff *skb, enum skb_ext_id id)
|
||||
{
|
||||
if (skb_ext_exist(skb, id)) {
|
||||
struct skb_ext *ext = skb->extensions;
|
||||
return (void *)ext + (ext->offset[id] << 3);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void skb_ext_put(struct sk_buff *skb) {}
|
||||
static inline void skb_ext_get(struct sk_buff *skb) {}
|
||||
static inline void skb_ext_del(struct sk_buff *skb, int unused) {}
|
||||
static inline void __skb_ext_copy(struct sk_buff *d, const struct sk_buff *s) {}
|
||||
static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *s) {}
|
||||
#endif /* CONFIG_SKB_EXTENSIONS */
|
||||
|
||||
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
|
||||
static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
|
||||
{
|
||||
@@ -3988,12 +4097,19 @@ static inline void skb_init_secmark(struct sk_buff *skb)
|
||||
{ }
|
||||
#endif
|
||||
|
||||
static inline int secpath_exists(const struct sk_buff *skb)
|
||||
{
|
||||
#ifdef CONFIG_XFRM
|
||||
return skb->sp != NULL;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool skb_irq_freeable(const struct sk_buff *skb)
|
||||
{
|
||||
return !skb->destructor &&
|
||||
#if IS_ENABLED(CONFIG_XFRM)
|
||||
!skb->sp &&
|
||||
#endif
|
||||
!secpath_exists(skb) &&
|
||||
!skb_nfct(skb) &&
|
||||
!skb->_skb_refdst &&
|
||||
!skb_has_frag_list(skb);
|
||||
|
||||
371
include/linux/skmsg.h
Normal file
371
include/linux/skmsg.h
Normal file
@@ -0,0 +1,371 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
|
||||
|
||||
#ifndef _LINUX_SKMSG_H
|
||||
#define _LINUX_SKMSG_H
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/skbuff.h>
|
||||
|
||||
#include <net/sock.h>
|
||||
#include <net/tcp.h>
|
||||
#include <net/strparser.h>
|
||||
|
||||
#define MAX_MSG_FRAGS MAX_SKB_FRAGS
|
||||
|
||||
enum __sk_action {
|
||||
__SK_DROP = 0,
|
||||
__SK_PASS,
|
||||
__SK_REDIRECT,
|
||||
__SK_NONE,
|
||||
};
|
||||
|
||||
struct sk_msg_sg {
|
||||
u32 start;
|
||||
u32 curr;
|
||||
u32 end;
|
||||
u32 size;
|
||||
u32 copybreak;
|
||||
bool copy[MAX_MSG_FRAGS];
|
||||
struct scatterlist data[MAX_MSG_FRAGS];
|
||||
};
|
||||
|
||||
struct sk_msg {
|
||||
struct sk_msg_sg sg;
|
||||
void *data;
|
||||
void *data_end;
|
||||
u32 apply_bytes;
|
||||
u32 cork_bytes;
|
||||
u32 flags;
|
||||
struct sk_buff *skb;
|
||||
struct sock *sk_redir;
|
||||
struct sock *sk;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
struct sk_psock_progs {
|
||||
struct bpf_prog *msg_parser;
|
||||
struct bpf_prog *skb_parser;
|
||||
struct bpf_prog *skb_verdict;
|
||||
};
|
||||
|
||||
enum sk_psock_state_bits {
|
||||
SK_PSOCK_TX_ENABLED,
|
||||
};
|
||||
|
||||
struct sk_psock_link {
|
||||
struct list_head list;
|
||||
struct bpf_map *map;
|
||||
void *link_raw;
|
||||
};
|
||||
|
||||
struct sk_psock_parser {
|
||||
struct strparser strp;
|
||||
bool enabled;
|
||||
void (*saved_data_ready)(struct sock *sk);
|
||||
};
|
||||
|
||||
struct sk_psock_work_state {
|
||||
struct sk_buff *skb;
|
||||
u32 len;
|
||||
u32 off;
|
||||
};
|
||||
|
||||
struct sk_psock {
|
||||
struct sock *sk;
|
||||
struct sock *sk_redir;
|
||||
u32 apply_bytes;
|
||||
u32 cork_bytes;
|
||||
u32 eval;
|
||||
struct sk_msg *cork;
|
||||
struct sk_psock_progs progs;
|
||||
struct sk_psock_parser parser;
|
||||
struct sk_buff_head ingress_skb;
|
||||
struct list_head ingress_msg;
|
||||
unsigned long state;
|
||||
struct list_head link;
|
||||
spinlock_t link_lock;
|
||||
refcount_t refcnt;
|
||||
void (*saved_unhash)(struct sock *sk);
|
||||
void (*saved_close)(struct sock *sk, long timeout);
|
||||
void (*saved_write_space)(struct sock *sk);
|
||||
struct proto *sk_proto;
|
||||
struct sk_psock_work_state work_state;
|
||||
struct work_struct work;
|
||||
union {
|
||||
struct rcu_head rcu;
|
||||
struct work_struct gc;
|
||||
};
|
||||
};
|
||||
|
||||
int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
|
||||
int elem_first_coalesce);
|
||||
void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len);
|
||||
int sk_msg_free(struct sock *sk, struct sk_msg *msg);
|
||||
int sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg);
|
||||
void sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, u32 bytes);
|
||||
void sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg,
|
||||
u32 bytes);
|
||||
|
||||
void sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes);
|
||||
|
||||
int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
|
||||
struct sk_msg *msg, u32 bytes);
|
||||
int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
|
||||
struct sk_msg *msg, u32 bytes);
|
||||
|
||||
static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
|
||||
{
|
||||
WARN_ON(i == msg->sg.end && bytes);
|
||||
}
|
||||
|
||||
static inline void sk_msg_apply_bytes(struct sk_psock *psock, u32 bytes)
|
||||
{
|
||||
if (psock->apply_bytes) {
|
||||
if (psock->apply_bytes < bytes)
|
||||
psock->apply_bytes = 0;
|
||||
else
|
||||
psock->apply_bytes -= bytes;
|
||||
}
|
||||
}
|
||||
|
||||
#define sk_msg_iter_var_prev(var) \
|
||||
do { \
|
||||
if (var == 0) \
|
||||
var = MAX_MSG_FRAGS - 1; \
|
||||
else \
|
||||
var--; \
|
||||
} while (0)
|
||||
|
||||
#define sk_msg_iter_var_next(var) \
|
||||
do { \
|
||||
var++; \
|
||||
if (var == MAX_MSG_FRAGS) \
|
||||
var = 0; \
|
||||
} while (0)
|
||||
|
||||
#define sk_msg_iter_prev(msg, which) \
|
||||
sk_msg_iter_var_prev(msg->sg.which)
|
||||
|
||||
#define sk_msg_iter_next(msg, which) \
|
||||
sk_msg_iter_var_next(msg->sg.which)
|
||||
|
||||
static inline void sk_msg_clear_meta(struct sk_msg *msg)
|
||||
{
|
||||
memset(&msg->sg, 0, offsetofend(struct sk_msg_sg, copy));
|
||||
}
|
||||
|
||||
static inline void sk_msg_init(struct sk_msg *msg)
|
||||
{
|
||||
memset(msg, 0, sizeof(*msg));
|
||||
sg_init_marker(msg->sg.data, ARRAY_SIZE(msg->sg.data));
|
||||
}
|
||||
|
||||
static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src,
|
||||
int which, u32 size)
|
||||
{
|
||||
dst->sg.data[which] = src->sg.data[which];
|
||||
dst->sg.data[which].length = size;
|
||||
src->sg.data[which].length -= size;
|
||||
src->sg.data[which].offset += size;
|
||||
}
|
||||
|
||||
static inline u32 sk_msg_elem_used(const struct sk_msg *msg)
|
||||
{
|
||||
return msg->sg.end >= msg->sg.start ?
|
||||
msg->sg.end - msg->sg.start :
|
||||
msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start);
|
||||
}
|
||||
|
||||
static inline bool sk_msg_full(const struct sk_msg *msg)
|
||||
{
|
||||
return (msg->sg.end == msg->sg.start) && msg->sg.size;
|
||||
}
|
||||
|
||||
static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which)
|
||||
{
|
||||
return &msg->sg.data[which];
|
||||
}
|
||||
|
||||
static inline struct page *sk_msg_page(struct sk_msg *msg, int which)
|
||||
{
|
||||
return sg_page(sk_msg_elem(msg, which));
|
||||
}
|
||||
|
||||
static inline bool sk_msg_to_ingress(const struct sk_msg *msg)
|
||||
{
|
||||
return msg->flags & BPF_F_INGRESS;
|
||||
}
|
||||
|
||||
static inline void sk_msg_compute_data_pointers(struct sk_msg *msg)
|
||||
{
|
||||
struct scatterlist *sge = sk_msg_elem(msg, msg->sg.start);
|
||||
|
||||
if (msg->sg.copy[msg->sg.start]) {
|
||||
msg->data = NULL;
|
||||
msg->data_end = NULL;
|
||||
} else {
|
||||
msg->data = sg_virt(sge);
|
||||
msg->data_end = msg->data + sge->length;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page,
|
||||
u32 len, u32 offset)
|
||||
{
|
||||
struct scatterlist *sge;
|
||||
|
||||
get_page(page);
|
||||
sge = sk_msg_elem(msg, msg->sg.end);
|
||||
sg_set_page(sge, page, len, offset);
|
||||
sg_unmark_end(sge);
|
||||
|
||||
msg->sg.copy[msg->sg.end] = true;
|
||||
msg->sg.size += len;
|
||||
sk_msg_iter_next(msg, end);
|
||||
}
|
||||
|
||||
static inline struct sk_psock *sk_psock(const struct sock *sk)
|
||||
{
|
||||
return rcu_dereference_sk_user_data(sk);
|
||||
}
|
||||
|
||||
static inline bool sk_has_psock(struct sock *sk)
|
||||
{
|
||||
return sk_psock(sk) != NULL && sk->sk_prot->recvmsg == tcp_bpf_recvmsg;
|
||||
}
|
||||
|
||||
static inline void sk_psock_queue_msg(struct sk_psock *psock,
|
||||
struct sk_msg *msg)
|
||||
{
|
||||
list_add_tail(&msg->list, &psock->ingress_msg);
|
||||
}
|
||||
|
||||
static inline void sk_psock_report_error(struct sk_psock *psock, int err)
|
||||
{
|
||||
struct sock *sk = psock->sk;
|
||||
|
||||
sk->sk_err = err;
|
||||
sk->sk_error_report(sk);
|
||||
}
|
||||
|
||||
struct sk_psock *sk_psock_init(struct sock *sk, int node);
|
||||
|
||||
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
|
||||
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
|
||||
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
|
||||
|
||||
int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
|
||||
struct sk_msg *msg);
|
||||
|
||||
static inline struct sk_psock_link *sk_psock_init_link(void)
|
||||
{
|
||||
return kzalloc(sizeof(struct sk_psock_link),
|
||||
GFP_ATOMIC | __GFP_NOWARN);
|
||||
}
|
||||
|
||||
static inline void sk_psock_free_link(struct sk_psock_link *link)
|
||||
{
|
||||
kfree(link);
|
||||
}
|
||||
|
||||
struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock);
|
||||
#if defined(CONFIG_BPF_STREAM_PARSER)
|
||||
void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link);
|
||||
#else
|
||||
static inline void sk_psock_unlink(struct sock *sk,
|
||||
struct sk_psock_link *link)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
void __sk_psock_purge_ingress_msg(struct sk_psock *psock);
|
||||
|
||||
static inline void sk_psock_cork_free(struct sk_psock *psock)
|
||||
{
|
||||
if (psock->cork) {
|
||||
sk_msg_free(psock->sk, psock->cork);
|
||||
kfree(psock->cork);
|
||||
psock->cork = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void sk_psock_update_proto(struct sock *sk,
|
||||
struct sk_psock *psock,
|
||||
struct proto *ops)
|
||||
{
|
||||
psock->saved_unhash = sk->sk_prot->unhash;
|
||||
psock->saved_close = sk->sk_prot->close;
|
||||
psock->saved_write_space = sk->sk_write_space;
|
||||
|
||||
psock->sk_proto = sk->sk_prot;
|
||||
sk->sk_prot = ops;
|
||||
}
|
||||
|
||||
static inline void sk_psock_restore_proto(struct sock *sk,
|
||||
struct sk_psock *psock)
|
||||
{
|
||||
if (psock->sk_proto) {
|
||||
sk->sk_prot = psock->sk_proto;
|
||||
psock->sk_proto = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void sk_psock_set_state(struct sk_psock *psock,
|
||||
enum sk_psock_state_bits bit)
|
||||
{
|
||||
set_bit(bit, &psock->state);
|
||||
}
|
||||
|
||||
static inline void sk_psock_clear_state(struct sk_psock *psock,
|
||||
enum sk_psock_state_bits bit)
|
||||
{
|
||||
clear_bit(bit, &psock->state);
|
||||
}
|
||||
|
||||
static inline bool sk_psock_test_state(const struct sk_psock *psock,
|
||||
enum sk_psock_state_bits bit)
|
||||
{
|
||||
return test_bit(bit, &psock->state);
|
||||
}
|
||||
|
||||
static inline struct sk_psock *sk_psock_get(struct sock *sk)
|
||||
{
|
||||
struct sk_psock *psock;
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (psock && !refcount_inc_not_zero(&psock->refcnt))
|
||||
psock = NULL;
|
||||
rcu_read_unlock();
|
||||
return psock;
|
||||
}
|
||||
|
||||
void sk_psock_stop(struct sock *sk, struct sk_psock *psock);
|
||||
void sk_psock_destroy(struct rcu_head *rcu);
|
||||
void sk_psock_drop(struct sock *sk, struct sk_psock *psock);
|
||||
|
||||
static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
if (refcount_dec_and_test(&psock->refcnt))
|
||||
sk_psock_drop(sk, psock);
|
||||
}
|
||||
|
||||
static inline void psock_set_prog(struct bpf_prog **pprog,
|
||||
struct bpf_prog *prog)
|
||||
{
|
||||
prog = xchg(pprog, prog);
|
||||
if (prog)
|
||||
bpf_prog_put(prog);
|
||||
}
|
||||
|
||||
static inline void psock_progs_drop(struct sk_psock_progs *progs)
|
||||
{
|
||||
psock_set_prog(&progs->msg_parser, NULL);
|
||||
psock_set_prog(&progs->skb_parser, NULL);
|
||||
psock_set_prog(&progs->skb_verdict, NULL);
|
||||
}
|
||||
|
||||
#endif /* _LINUX_SKMSG_H */
|
||||
@@ -42,9 +42,10 @@ typedef struct tracepoint * const tracepoint_ptr_t;
|
||||
#endif
|
||||
|
||||
struct bpf_raw_event_map {
|
||||
struct tracepoint *tp;
|
||||
void *bpf_func;
|
||||
u32 num_args;
|
||||
struct tracepoint *tp;
|
||||
void *bpf_func;
|
||||
u32 num_args;
|
||||
u32 writable_size;
|
||||
} __aligned(32);
|
||||
|
||||
#endif
|
||||
|
||||
13
include/net/bpf_sk_storage.h
Normal file
13
include/net/bpf_sk_storage.h
Normal file
@@ -0,0 +1,13 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* Copyright (c) 2019 Facebook */
|
||||
#ifndef _BPF_SK_STORAGE_H
|
||||
#define _BPF_SK_STORAGE_H
|
||||
|
||||
struct sock;
|
||||
|
||||
void bpf_sk_storage_free(struct sock *sk);
|
||||
|
||||
extern const struct bpf_func_proto bpf_sk_storage_get_proto;
|
||||
extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
|
||||
|
||||
#endif /* _BPF_SK_STORAGE_H */
|
||||
@@ -44,6 +44,7 @@ struct net_generic;
|
||||
struct uevent_sock;
|
||||
struct netns_ipvs;
|
||||
|
||||
struct bpf_prog;
|
||||
|
||||
#define NETDEV_HASHBITS 8
|
||||
#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
|
||||
@@ -144,6 +145,8 @@ struct net {
|
||||
#endif
|
||||
struct net_generic __rcu *gen;
|
||||
|
||||
struct bpf_prog __rcu *flow_dissector_prog;
|
||||
|
||||
/* Note : following structs are cache line aligned */
|
||||
#ifdef CONFIG_XFRM
|
||||
struct netns_xfrm xfrm;
|
||||
|
||||
@@ -19,6 +19,7 @@ struct Qdisc_ops;
|
||||
struct qdisc_walker;
|
||||
struct tcf_walker;
|
||||
struct module;
|
||||
struct bpf_flow_keys;
|
||||
|
||||
typedef int tc_setup_cb_t(enum tc_setup_type type,
|
||||
void *type_data, void *cb_priv);
|
||||
@@ -327,9 +328,14 @@ struct tcf_proto {
|
||||
};
|
||||
|
||||
struct qdisc_skb_cb {
|
||||
unsigned int pkt_len;
|
||||
u16 slave_dev_queue_mapping;
|
||||
u16 tc_classid;
|
||||
union {
|
||||
struct {
|
||||
unsigned int pkt_len;
|
||||
u16 slave_dev_queue_mapping;
|
||||
u16 tc_classid;
|
||||
};
|
||||
struct bpf_flow_keys *flow_keys;
|
||||
};
|
||||
#define QDISC_CB_PRIV_LEN 20
|
||||
unsigned char data[QDISC_CB_PRIV_LEN];
|
||||
};
|
||||
|
||||
@@ -237,6 +237,8 @@ struct sock_common {
|
||||
/* public: */
|
||||
};
|
||||
|
||||
struct bpf_sk_storage;
|
||||
|
||||
/**
|
||||
* struct sock - network layer representation of sockets
|
||||
* @__sk_common: shared layout with inet_timewait_sock
|
||||
@@ -515,6 +517,9 @@ struct sock {
|
||||
#endif
|
||||
void (*sk_destruct)(struct sock *sk);
|
||||
struct sock_reuseport __rcu *sk_reuseport_cb;
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
struct bpf_sk_storage __rcu *sk_bpf_storage;
|
||||
#endif
|
||||
struct rcu_head sk_rcu;
|
||||
|
||||
#if IS_ENABLED(CONFIG_DEBUG_SPINLOCK) || IS_ENABLED(CONFIG_DEBUG_LOCK_ALLOC)
|
||||
|
||||
@@ -903,6 +903,21 @@ static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
|
||||
TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
|
||||
}
|
||||
|
||||
static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb)
|
||||
{
|
||||
return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS;
|
||||
}
|
||||
|
||||
static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb)
|
||||
{
|
||||
return TCP_SKB_CB(skb)->bpf.sk_redir;
|
||||
}
|
||||
|
||||
static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb)
|
||||
{
|
||||
TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
/* This is the variant of inet6_iif() that must be used by TCP,
|
||||
* as TCP moves IP6CB into a different location in skb->cb[]
|
||||
@@ -2164,6 +2179,21 @@ void tcp_cleanup_ulp(struct sock *sk);
|
||||
__MODULE_INFO(alias, alias_userspace, name); \
|
||||
__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
|
||||
|
||||
struct sk_msg;
|
||||
struct sk_psock;
|
||||
|
||||
int tcp_bpf_init(struct sock *sk);
|
||||
void tcp_bpf_reinit(struct sock *sk);
|
||||
|
||||
int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
|
||||
int flags);
|
||||
|
||||
int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
||||
int nonblock, int flags, int *addr_len);
|
||||
|
||||
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
|
||||
struct msghdr *msg, int len);
|
||||
|
||||
/* Call BPF_SOCK_OPS program that returns an int. If the return value
|
||||
* is < 0, then the BPF op failed (for example if the loaded BPF
|
||||
* program does not support the chosen operation or there is no BPF
|
||||
|
||||
@@ -1107,15 +1107,6 @@ struct sec_path {
|
||||
struct xfrm_offload ovec[XFRM_MAX_OFFLOAD_DEPTH];
|
||||
};
|
||||
|
||||
static inline int secpath_exists(struct sk_buff *skb)
|
||||
{
|
||||
#ifdef CONFIG_XFRM
|
||||
return skb->sp != NULL;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline struct sec_path *
|
||||
secpath_get(struct sec_path *sp)
|
||||
{
|
||||
|
||||
@@ -69,8 +69,7 @@ __bpf_trace_##call(void *__data, proto) \
|
||||
* to make sure that if the tracepoint handling changes, the
|
||||
* bpf probe will fail to compile unless it too is updated.
|
||||
*/
|
||||
#undef DEFINE_EVENT
|
||||
#define DEFINE_EVENT(template, call, proto, args) \
|
||||
#define __DEFINE_EVENT(template, call, proto, args, size) \
|
||||
static inline void bpf_test_probe_##call(void) \
|
||||
{ \
|
||||
check_trace_callback_type_##call(__bpf_trace_##template); \
|
||||
@@ -81,12 +80,34 @@ __bpf_trace_tp_map_##call = { \
|
||||
.tp = &__tracepoint_##call, \
|
||||
.bpf_func = (void *)__bpf_trace_##template, \
|
||||
.num_args = COUNT_ARGS(args), \
|
||||
.writable_size = size, \
|
||||
};
|
||||
|
||||
#define FIRST(x, ...) x
|
||||
#undef DEFINE_EVENT_WRITABLE
|
||||
#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \
|
||||
static inline void bpf_test_buffer_##call(void) \
|
||||
{ \
|
||||
/* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \
|
||||
* BUILD_BUG_ON_ZERO() uses a different mechanism that is not \
|
||||
* dead-code-eliminated. \
|
||||
*/ \
|
||||
FIRST(proto); \
|
||||
(void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args))); \
|
||||
} \
|
||||
__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size)
|
||||
|
||||
#undef DEFINE_EVENT
|
||||
#define DEFINE_EVENT(template, call, proto, args) \
|
||||
__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0)
|
||||
|
||||
#undef DEFINE_EVENT_PRINT
|
||||
#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
|
||||
DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
|
||||
|
||||
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
|
||||
|
||||
#undef DEFINE_EVENT_WRITABLE
|
||||
#undef __DEFINE_EVENT
|
||||
#undef FIRST
|
||||
#endif /* CONFIG_BPF_EVENTS */
|
||||
|
||||
@@ -126,7 +126,10 @@ enum bpf_map_type {
|
||||
BPF_MAP_TYPE_XSKMAP,
|
||||
BPF_MAP_TYPE_SOCKHASH,
|
||||
BPF_MAP_TYPE_CGROUP_STORAGE,
|
||||
BPF_MAP_TYPE_SK_STORAGE,
|
||||
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
|
||||
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE = 22,
|
||||
BPF_MAP_TYPE_DEVMAP_HASH = 25,
|
||||
};
|
||||
|
||||
enum bpf_prog_type {
|
||||
@@ -152,6 +155,10 @@ enum bpf_prog_type {
|
||||
BPF_PROG_TYPE_LWT_SEG6LOCAL,
|
||||
BPF_PROG_TYPE_LIRC_MODE2,
|
||||
BPF_PROG_TYPE_SK_REUSEPORT,
|
||||
BPF_PROG_TYPE_FLOW_DISSECTOR = 22,
|
||||
BPF_PROG_TYPE_CGROUP_SYSCTL = 23,
|
||||
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE = 24,
|
||||
BPF_PROG_TYPE_CGROUP_SOCKOPT = 25,
|
||||
};
|
||||
|
||||
enum bpf_attach_type {
|
||||
@@ -172,8 +179,12 @@ enum bpf_attach_type {
|
||||
BPF_CGROUP_UDP4_SENDMSG,
|
||||
BPF_CGROUP_UDP6_SENDMSG,
|
||||
BPF_LIRC_MODE2,
|
||||
BPF_FLOW_DISSECTOR = 17,
|
||||
BPF_CGROUP_SYSCTL = 18,
|
||||
BPF_CGROUP_UDP4_RECVMSG = 19,
|
||||
BPF_CGROUP_UDP6_RECVMSG,
|
||||
BPF_CGROUP_UDP6_RECVMSG = 20,
|
||||
BPF_CGROUP_GETSOCKOPT = 21,
|
||||
BPF_CGROUP_SETSOCKOPT = 22,
|
||||
__MAX_BPF_ATTACH_TYPE
|
||||
};
|
||||
|
||||
@@ -243,7 +254,19 @@ enum bpf_attach_type {
|
||||
#define BPF_F_ANY_ALIGNMENT (1U << 1)
|
||||
|
||||
/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
|
||||
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
|
||||
* two extensions:
|
||||
*
|
||||
* insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
|
||||
* insn[0].imm: map fd map fd
|
||||
* insn[1].imm: 0 offset into value
|
||||
* insn[0].off: 0 0
|
||||
* insn[1].off: 0 0
|
||||
* ldimm64 rewrite: address of map address of map[0]+offset
|
||||
* verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
|
||||
*/
|
||||
#define BPF_PSEUDO_MAP_FD 1
|
||||
#define BPF_PSEUDO_MAP_VALUE 2
|
||||
|
||||
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
|
||||
* offset to another bpf function
|
||||
@@ -254,6 +277,7 @@ enum bpf_attach_type {
|
||||
#define BPF_ANY 0 /* create new element or update existing */
|
||||
#define BPF_NOEXIST 1 /* create new element if it didn't exist */
|
||||
#define BPF_EXIST 2 /* update existing element */
|
||||
#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */
|
||||
|
||||
/* flags for BPF_MAP_CREATE command */
|
||||
#define BPF_F_NO_PREALLOC (1U << 0)
|
||||
@@ -297,6 +321,8 @@ struct bpf_stack_build_id {
|
||||
__u64 ip;
|
||||
};
|
||||
};
|
||||
/* Flags for accessing BPF object from program side. */
|
||||
#define BPF_F_RDONLY_PROG (1U << 7)
|
||||
|
||||
union bpf_attr {
|
||||
struct { /* anonymous struct used by BPF_MAP_CREATE command */
|
||||
@@ -345,6 +371,13 @@ union bpf_attr {
|
||||
* (context accesses, allowed helpers, etc).
|
||||
*/
|
||||
__u32 expected_attach_type;
|
||||
__u32 prog_btf_fd; /* fd pointing to BTF type data */
|
||||
__u32 func_info_rec_size; /* userspace bpf_func_info size */
|
||||
__aligned_u64 func_info; /* func info */
|
||||
__u32 func_info_cnt; /* number of bpf_func_info records */
|
||||
__u32 line_info_rec_size; /* userspace bpf_line_info size */
|
||||
__aligned_u64 line_info; /* line info */
|
||||
__u32 line_info_cnt; /* number of bpf_line_info records */
|
||||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_OBJ_* commands */
|
||||
@@ -2162,13 +2195,139 @@ union bpf_attr {
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* u64 bpf_ktime_get_boot_ns(void)
|
||||
* Description
|
||||
* Return the time elapsed since system boot, in nanoseconds.
|
||||
* Does include the time the system was suspended.
|
||||
* See: clock_gettime(CLOCK_BOOTTIME)
|
||||
* Return
|
||||
* Current *ktime*.
|
||||
* int bpf_xdp_adjust_meta(xdp_md, delta)
|
||||
* Adjust the xdp_md.data_meta by delta
|
||||
* @xdp_md: pointer to xdp_md
|
||||
* @delta: An positive/negative integer to be added to xdp_md.data_meta
|
||||
* Return: 0 on success or negative on error
|
||||
*
|
||||
* struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
|
||||
* Description
|
||||
* This helper gets a **struct bpf_sock** pointer such
|
||||
* that all the fields in bpf_sock can be accessed.
|
||||
* Return
|
||||
* A **struct bpf_sock** pointer on success, or NULL in
|
||||
* case of failure.
|
||||
*
|
||||
* struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
|
||||
* Description
|
||||
* Look for TCP socket matching *tuple*, optionally in a child
|
||||
* network namespace *netns*. The return value must be checked,
|
||||
* and if non-NULL, released via **bpf_sk_release**\ ().
|
||||
*
|
||||
* The *ctx* should point to the context of the program, such as
|
||||
* the skb or socket (depending on the hook in use). This is used
|
||||
* to determine the base network namespace for the lookup.
|
||||
*
|
||||
* *tuple_size* must be one of:
|
||||
*
|
||||
* **sizeof**\ (*tuple*\ **->ipv4**)
|
||||
* Look for an IPv4 socket.
|
||||
* **sizeof**\ (*tuple*\ **->ipv6**)
|
||||
* Look for an IPv6 socket.
|
||||
*
|
||||
* If the *netns* is zero, then the socket lookup table in the
|
||||
* netns associated with the *ctx* will be used. For the TC hooks,
|
||||
* this in the netns of the device in the skb. For socket hooks,
|
||||
* this in the netns of the socket. If *netns* is non-zero, then
|
||||
* it specifies the ID of the netns relative to the netns
|
||||
* associated with the *ctx*.
|
||||
*
|
||||
* All values for *flags* are reserved for future usage, and must
|
||||
* be left at zero.
|
||||
*
|
||||
* This helper is available only if the kernel was compiled with
|
||||
* **CONFIG_NET** configuration option.
|
||||
* Return
|
||||
* Pointer to *struct bpf_sock*, or NULL in case of failure.
|
||||
* For sockets with reuseport option, *struct bpf_sock*
|
||||
* return is from reuse->socks[] using hash of the packet.
|
||||
*
|
||||
* struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
|
||||
* Description
|
||||
* Look for UDP socket matching *tuple*, optionally in a child
|
||||
* network namespace *netns*. The return value must be checked,
|
||||
* and if non-NULL, released via **bpf_sk_release**\ ().
|
||||
*
|
||||
* The *ctx* should point to the context of the program, such as
|
||||
* the skb or socket (depending on the hook in use). This is used
|
||||
* to determine the base network namespace for the lookup.
|
||||
*
|
||||
* *tuple_size* must be one of:
|
||||
*
|
||||
* **sizeof**\ (*tuple*\ **->ipv4**)
|
||||
* Look for an IPv4 socket.
|
||||
* **sizeof**\ (*tuple*\ **->ipv6**)
|
||||
* Look for an IPv6 socket.
|
||||
*
|
||||
* If the *netns* is zero, then the socket lookup table in the
|
||||
* netns associated with the *ctx* will be used. For the TC hooks,
|
||||
* this in the netns of the device in the skb. For socket hooks,
|
||||
* this in the netns of the socket. If *netns* is non-zero, then
|
||||
* it specifies the ID of the netns relative to the netns
|
||||
* associated with the *ctx*.
|
||||
*
|
||||
* All values for *flags* are reserved for future usage, and must
|
||||
* be left at zero.
|
||||
*
|
||||
* This helper is available only if the kernel was compiled with
|
||||
* **CONFIG_NET** configuration option.
|
||||
* Return
|
||||
* Pointer to *struct bpf_sock*, or NULL in case of failure.
|
||||
* For sockets with reuseport option, *struct bpf_sock*
|
||||
* return is from reuse->socks[] using hash of the packet.
|
||||
*
|
||||
* int bpf_sk_release(struct bpf_sock *sk)
|
||||
* Description
|
||||
* Release the reference held by *sock*. *sock* must be a non-NULL
|
||||
* pointer that was returned from bpf_sk_lookup_xxx\ ().
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
|
||||
* Description
|
||||
* This helper gets a **struct bpf_tcp_sock** pointer from a
|
||||
* **struct bpf_sock** pointer.
|
||||
*
|
||||
* Return
|
||||
* A **struct bpf_tcp_sock** pointer on success, or NULL in
|
||||
* case of failure.
|
||||
*
|
||||
* void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags)
|
||||
* Description
|
||||
* Get a bpf-local-storage from a sk.
|
||||
*
|
||||
* Logically, it could be thought of getting the value from
|
||||
* a *map* with *sk* as the **key**. From this
|
||||
* perspective, the usage is not much different from
|
||||
* **bpf_map_lookup_elem(map, &sk)** except this
|
||||
* helper enforces the key must be a **bpf_fullsock()**
|
||||
* and the map must be a BPF_MAP_TYPE_SK_STORAGE also.
|
||||
*
|
||||
* Underneath, the value is stored locally at *sk* instead of
|
||||
* the map. The *map* is used as the bpf-local-storage **type**.
|
||||
* The bpf-local-storage **type** (i.e. the *map*) is searched
|
||||
* against all bpf-local-storages residing at sk.
|
||||
*
|
||||
* An optional *flags* (BPF_SK_STORAGE_GET_F_CREATE) can be
|
||||
* used such that a new bpf-local-storage will be
|
||||
* created if one does not exist. *value* can be used
|
||||
* together with BPF_SK_STORAGE_GET_F_CREATE to specify
|
||||
* the initial value of a bpf-local-storage. If *value* is
|
||||
* NULL, the new bpf-local-storage will be zero initialized.
|
||||
* Return
|
||||
* A bpf-local-storage pointer is returned on success.
|
||||
*
|
||||
* **NULL** if not found or there was an error in adding
|
||||
* a new bpf-local-storage.
|
||||
*
|
||||
* int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
|
||||
* Description
|
||||
* Delete a bpf-local-storage from a sk.
|
||||
* Return
|
||||
* 0 on success.
|
||||
*
|
||||
* **-ENOENT** if the bpf-local-storage cannot be found.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
@@ -2352,6 +2511,9 @@ enum bpf_func_id {
|
||||
/* BPF_FUNC_perf_event_output for sk_buff input context. */
|
||||
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
|
||||
|
||||
/* BPF_FUNC_sk_storage_get flags */
|
||||
#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0)
|
||||
|
||||
/* Mode for BPF_FUNC_skb_adjust_room helper. */
|
||||
enum bpf_adj_room_mode {
|
||||
BPF_ADJ_ROOM_NET,
|
||||
@@ -2369,6 +2531,12 @@ enum bpf_lwt_encap_mode {
|
||||
BPF_LWT_ENCAP_SEG6_INLINE
|
||||
};
|
||||
|
||||
#define __bpf_md_ptr(type, name) \
|
||||
union { \
|
||||
type name; \
|
||||
__u64 :64; \
|
||||
} __attribute__((aligned(8)))
|
||||
|
||||
/* user accessible mirror of in-kernel sk_buff.
|
||||
* new fields can only be added to the end of this structure
|
||||
*/
|
||||
@@ -2400,9 +2568,11 @@ struct __sk_buff {
|
||||
__u32 local_ip6[4]; /* Stored in network byte order */
|
||||
__u32 remote_port; /* Stored in network byte order */
|
||||
__u32 local_port; /* stored in host byte order */
|
||||
__bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
|
||||
/* ... here. */
|
||||
|
||||
__u32 data_meta;
|
||||
__bpf_md_ptr(struct bpf_sock *, sk);
|
||||
};
|
||||
|
||||
struct bpf_tunnel_key {
|
||||
@@ -2465,6 +2635,62 @@ struct bpf_sock {
|
||||
*/
|
||||
};
|
||||
|
||||
struct bpf_tcp_sock {
|
||||
__u32 snd_cwnd; /* Sending congestion window */
|
||||
__u32 srtt_us; /* smoothed round trip time << 3 in usecs */
|
||||
__u32 rtt_min;
|
||||
__u32 snd_ssthresh; /* Slow start size threshold */
|
||||
__u32 rcv_nxt; /* What we want to receive next */
|
||||
__u32 snd_nxt; /* Next sequence we send */
|
||||
__u32 snd_una; /* First byte we want an ack for */
|
||||
__u32 mss_cache; /* Cached effective mss, not including SACKS */
|
||||
__u32 ecn_flags; /* ECN status bits. */
|
||||
__u32 rate_delivered; /* saved rate sample: packets delivered */
|
||||
__u32 rate_interval_us; /* saved rate sample: time elapsed */
|
||||
__u32 packets_out; /* Packets which are "in flight" */
|
||||
__u32 retrans_out; /* Retransmitted packets out */
|
||||
__u32 total_retrans; /* Total retransmits for entire connection */
|
||||
__u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn
|
||||
* total number of segments in.
|
||||
*/
|
||||
__u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn
|
||||
* total number of data segments in.
|
||||
*/
|
||||
__u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut
|
||||
* The total number of segments sent.
|
||||
*/
|
||||
__u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut
|
||||
* total number of data segments sent.
|
||||
*/
|
||||
__u32 lost_out; /* Lost packets */
|
||||
__u32 sacked_out; /* SACK'd packets */
|
||||
__u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived
|
||||
* sum(delta(rcv_nxt)), or how many bytes
|
||||
* were acked.
|
||||
*/
|
||||
__u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
|
||||
* sum(delta(snd_una)), or how many bytes
|
||||
* were acked.
|
||||
*/
|
||||
};
|
||||
|
||||
struct bpf_sock_tuple {
|
||||
union {
|
||||
struct {
|
||||
__be32 saddr;
|
||||
__be32 daddr;
|
||||
__be16 sport;
|
||||
__be16 dport;
|
||||
} ipv4;
|
||||
struct {
|
||||
__be32 saddr[4];
|
||||
__be32 daddr[4];
|
||||
__be16 sport;
|
||||
__be16 dport;
|
||||
} ipv6;
|
||||
};
|
||||
};
|
||||
|
||||
#define XDP_PACKET_HEADROOM 256
|
||||
|
||||
/* User return codes for XDP prog type.
|
||||
@@ -2501,8 +2727,8 @@ enum sk_action {
|
||||
* be added to the end of this structure
|
||||
*/
|
||||
struct sk_msg_md {
|
||||
void *data;
|
||||
void *data_end;
|
||||
__bpf_md_ptr(void *, data);
|
||||
__bpf_md_ptr(void *, data_end);
|
||||
|
||||
__u32 family;
|
||||
__u32 remote_ip4; /* Stored in network byte order */
|
||||
@@ -2518,8 +2744,9 @@ struct sk_reuseport_md {
|
||||
* Start of directly accessible data. It begins from
|
||||
* the tcp/udp header.
|
||||
*/
|
||||
void *data;
|
||||
void *data_end; /* End of directly accessible data */
|
||||
__bpf_md_ptr(void *, data);
|
||||
/* End of directly accessible data */
|
||||
__bpf_md_ptr(void *, data_end);
|
||||
/*
|
||||
* Total length of packet (starting from the tcp/udp header).
|
||||
* Note that the directly accessible bytes (data_end - data)
|
||||
@@ -2561,6 +2788,16 @@ struct bpf_prog_info {
|
||||
__u32 nr_jited_func_lens;
|
||||
__aligned_u64 jited_ksyms;
|
||||
__aligned_u64 jited_func_lens;
|
||||
__u32 btf_id;
|
||||
__u32 func_info_rec_size;
|
||||
__aligned_u64 func_info;
|
||||
__u32 func_info_cnt;
|
||||
__u32 line_info_cnt;
|
||||
__aligned_u64 line_info;
|
||||
__aligned_u64 jited_line_info;
|
||||
__u32 jited_line_info_cnt;
|
||||
__u32 line_info_rec_size;
|
||||
__u32 jited_line_info_rec_size;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_map_info {
|
||||
@@ -2773,6 +3010,29 @@ struct bpf_raw_tracepoint_args {
|
||||
__u64 args[0];
|
||||
};
|
||||
|
||||
struct bpf_flow_keys {
|
||||
__u16 nhoff;
|
||||
__u16 thoff;
|
||||
__u16 addr_proto; /* ETH_P_* of valid addrs */
|
||||
__u8 is_frag;
|
||||
__u8 is_first_frag;
|
||||
__u8 is_encap;
|
||||
__u8 ip_proto;
|
||||
__be16 n_proto;
|
||||
__be16 sport;
|
||||
__be16 dport;
|
||||
union {
|
||||
struct {
|
||||
__be32 ipv4_src;
|
||||
__be32 ipv4_dst;
|
||||
};
|
||||
struct {
|
||||
__u32 ipv6_src[4]; /* in6_addr; network order */
|
||||
__u32 ipv6_dst[4]; /* in6_addr; network order */
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
/* DIRECT: Skip the FIB rules and go to FIB table associated with device
|
||||
* OUTPUT: Do lookup from egress perspective; default is ingress
|
||||
*/
|
||||
@@ -2849,4 +3109,38 @@ enum bpf_task_fd_type {
|
||||
BPF_FD_TYPE_URETPROBE, /* filename + offset */
|
||||
};
|
||||
|
||||
struct bpf_sysctl {
|
||||
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
|
||||
* Allows 1,2,4-byte read, but no write.
|
||||
*/
|
||||
};
|
||||
|
||||
struct bpf_func_info {
|
||||
__u32 insn_offset;
|
||||
__u32 type_id;
|
||||
};
|
||||
|
||||
#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10)
|
||||
#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff)
|
||||
struct bpf_line_info {
|
||||
__u32 insn_off;
|
||||
__u32 file_name_off;
|
||||
__u32 line_off;
|
||||
__u32 line_col;
|
||||
};
|
||||
|
||||
struct bpf_spin_lock {
|
||||
__u32 val;
|
||||
};
|
||||
|
||||
struct bpf_sockopt {
|
||||
__bpf_md_ptr(struct bpf_sock *, sk);
|
||||
__bpf_md_ptr(void *, optval);
|
||||
__bpf_md_ptr(void *, optval_end);
|
||||
__s32 level;
|
||||
__s32 optname;
|
||||
__s32 optlen;
|
||||
__s32 retval;
|
||||
};
|
||||
|
||||
#endif /* _UAPI__LINUX_BPF_H__ */
|
||||
|
||||
@@ -34,13 +34,16 @@ struct btf_type {
|
||||
* bits 0-15: vlen (e.g. # of struct's members)
|
||||
* bits 16-23: unused
|
||||
* bits 24-27: kind (e.g. int, ptr, array...etc)
|
||||
* bits 28-31: unused
|
||||
* bits 28-30: unused
|
||||
* bit 31: kind_flag, currently used by
|
||||
* struct, union and fwd
|
||||
*/
|
||||
__u32 info;
|
||||
/* "size" is used by INT, ENUM, STRUCT and UNION.
|
||||
* "size" tells the size of the type it is describing.
|
||||
*
|
||||
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
|
||||
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
|
||||
* FUNC and FUNC_PROTO.
|
||||
* "type" is a type_id referring to another type.
|
||||
*/
|
||||
union {
|
||||
@@ -51,6 +54,7 @@ struct btf_type {
|
||||
|
||||
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
|
||||
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
|
||||
#define BTF_INFO_KFLAG(info) ((info) >> 31)
|
||||
|
||||
#define BTF_KIND_UNKN 0 /* Unknown */
|
||||
#define BTF_KIND_INT 1 /* Integer */
|
||||
@@ -64,8 +68,10 @@ struct btf_type {
|
||||
#define BTF_KIND_VOLATILE 9 /* Volatile */
|
||||
#define BTF_KIND_CONST 10 /* Const */
|
||||
#define BTF_KIND_RESTRICT 11 /* Restrict */
|
||||
#define BTF_KIND_MAX 11
|
||||
#define NR_BTF_KINDS 12
|
||||
#define BTF_KIND_FUNC 12 /* Function */
|
||||
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
|
||||
#define BTF_KIND_MAX 13
|
||||
#define NR_BTF_KINDS 14
|
||||
|
||||
/* For some specific BTF_KIND, "struct btf_type" is immediately
|
||||
* followed by extra data.
|
||||
@@ -107,7 +113,28 @@ struct btf_array {
|
||||
struct btf_member {
|
||||
__u32 name_off;
|
||||
__u32 type;
|
||||
__u32 offset; /* offset in bits */
|
||||
/* If the type info kind_flag is set, the btf_member offset
|
||||
* contains both member bitfield size and bit offset. The
|
||||
* bitfield size is set for bitfield members. If the type
|
||||
* info kind_flag is not set, the offset contains only bit
|
||||
* offset.
|
||||
*/
|
||||
__u32 offset;
|
||||
};
|
||||
|
||||
/* If the struct/union type info kind_flag is set, the
|
||||
* following two macros are used to access bitfield_size
|
||||
* and bit_offset from btf_member.offset.
|
||||
*/
|
||||
#define BTF_MEMBER_BITFIELD_SIZE(val) ((val) >> 24)
|
||||
#define BTF_MEMBER_BIT_OFFSET(val) ((val) & 0xffffff)
|
||||
|
||||
/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param".
|
||||
* The exact number of btf_param is stored in the vlen (of the
|
||||
* info in "struct btf_type").
|
||||
*/
|
||||
struct btf_param {
|
||||
__u32 name_off;
|
||||
__u32 type;
|
||||
};
|
||||
#endif /* _UAPI__LINUX_BTF_H__ */
|
||||
|
||||
@@ -242,6 +242,9 @@ config QUEUED_SPINLOCKS
|
||||
def_bool y if ARCH_USE_QUEUED_SPINLOCKS
|
||||
depends on SMP
|
||||
|
||||
config BPF_ARCH_SPINLOCK
|
||||
bool
|
||||
|
||||
config ARCH_USE_QUEUED_RWLOCKS
|
||||
bool
|
||||
|
||||
|
||||
@@ -14,11 +14,6 @@ ifeq ($(CONFIG_XDP_SOCKETS),y)
|
||||
obj-$(CONFIG_BPF_SYSCALL) += xskmap.o
|
||||
endif
|
||||
obj-$(CONFIG_BPF_SYSCALL) += offload.o
|
||||
ifeq ($(CONFIG_STREAM_PARSER),y)
|
||||
ifeq ($(CONFIG_INET),y)
|
||||
obj-$(CONFIG_BPF_SYSCALL) += sockmap.o
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
ifeq ($(CONFIG_PERF_EVENTS),y)
|
||||
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
|
||||
|
||||
@@ -163,6 +163,39 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
return array->value + array->elem_size * (index & array->index_mask);
|
||||
}
|
||||
|
||||
static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
|
||||
u32 off)
|
||||
{
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
|
||||
if (map->max_entries != 1)
|
||||
return -ENOTSUPP;
|
||||
|
||||
if (off >= map->value_size)
|
||||
return -EINVAL;
|
||||
|
||||
*imm = (unsigned long)array->value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
|
||||
u32 *off)
|
||||
{
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
u64 base = (unsigned long)array->value;
|
||||
u64 range = array->elem_size;
|
||||
|
||||
if (map->max_entries != 1)
|
||||
return -ENOTSUPP;
|
||||
|
||||
if (imm < base || imm >= base + range)
|
||||
return -ENOENT;
|
||||
|
||||
*off = imm - base;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
|
||||
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
||||
{
|
||||
@@ -256,8 +289,9 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
{
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
u32 index = *(u32 *)key;
|
||||
char *val;
|
||||
|
||||
if (unlikely(map_flags > BPF_EXIST))
|
||||
if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
|
||||
/* unknown flags */
|
||||
return -EINVAL;
|
||||
|
||||
@@ -265,17 +299,26 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
/* all elements were pre-allocated, cannot insert a new one */
|
||||
return -E2BIG;
|
||||
|
||||
if (unlikely(map_flags == BPF_NOEXIST))
|
||||
if (unlikely(map_flags & BPF_NOEXIST))
|
||||
/* all elements already exist */
|
||||
return -EEXIST;
|
||||
|
||||
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
|
||||
if (unlikely((map_flags & BPF_F_LOCK) &&
|
||||
!map_value_has_spin_lock(map)))
|
||||
return -EINVAL;
|
||||
|
||||
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
|
||||
memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
|
||||
value, map->value_size);
|
||||
else
|
||||
memcpy(array->value +
|
||||
array->elem_size * (index & array->index_mask),
|
||||
value, map->value_size);
|
||||
} else {
|
||||
val = array->value +
|
||||
array->elem_size * (index & array->index_mask);
|
||||
if (map_flags & BPF_F_LOCK)
|
||||
copy_map_value_locked(map, val, value, false);
|
||||
else
|
||||
copy_map_value(map, val, value);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -362,6 +405,7 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
|
||||
}
|
||||
|
||||
static int array_map_check_btf(const struct bpf_map *map,
|
||||
const struct btf *btf,
|
||||
const struct btf_type *key_type,
|
||||
const struct btf_type *value_type)
|
||||
{
|
||||
@@ -389,6 +433,8 @@ const struct bpf_map_ops array_map_ops = {
|
||||
.map_update_elem = array_map_update_elem,
|
||||
.map_delete_elem = array_map_delete_elem,
|
||||
.map_gen_lookup = array_map_gen_lookup,
|
||||
.map_direct_value_addr = array_map_direct_value_addr,
|
||||
.map_direct_value_meta = array_map_direct_value_meta,
|
||||
.map_seq_show_elem = array_map_seq_show_elem,
|
||||
.map_check_btf = array_map_check_btf,
|
||||
};
|
||||
|
||||
778
kernel/bpf/btf.c
778
kernel/bpf/btf.c
File diff suppressed because it is too large
Load Diff
@@ -11,10 +11,13 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/bpf-cgroup.h>
|
||||
#include <net/sock.h>
|
||||
#include <net/bpf_sk_storage.h>
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
|
||||
EXPORT_SYMBOL(cgroup_bpf_enabled_key);
|
||||
@@ -25,6 +28,7 @@ EXPORT_SYMBOL(cgroup_bpf_enabled_key);
|
||||
*/
|
||||
void cgroup_bpf_put(struct cgroup *cgrp)
|
||||
{
|
||||
enum bpf_cgroup_storage_type stype;
|
||||
unsigned int type;
|
||||
|
||||
for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
|
||||
@@ -34,8 +38,10 @@ void cgroup_bpf_put(struct cgroup *cgrp)
|
||||
list_for_each_entry_safe(pl, tmp, progs, node) {
|
||||
list_del(&pl->node);
|
||||
bpf_prog_put(pl->prog);
|
||||
bpf_cgroup_storage_unlink(pl->storage);
|
||||
bpf_cgroup_storage_free(pl->storage);
|
||||
for_each_cgroup_storage_type(stype) {
|
||||
bpf_cgroup_storage_unlink(pl->storage[stype]);
|
||||
bpf_cgroup_storage_free(pl->storage[stype]);
|
||||
}
|
||||
kfree(pl);
|
||||
static_branch_dec(&cgroup_bpf_enabled_key);
|
||||
}
|
||||
@@ -97,6 +103,7 @@ static int compute_effective_progs(struct cgroup *cgrp,
|
||||
enum bpf_attach_type type,
|
||||
struct bpf_prog_array __rcu **array)
|
||||
{
|
||||
enum bpf_cgroup_storage_type stype;
|
||||
struct bpf_prog_array *progs;
|
||||
struct bpf_prog_list *pl;
|
||||
struct cgroup *p = cgrp;
|
||||
@@ -125,7 +132,9 @@ static int compute_effective_progs(struct cgroup *cgrp,
|
||||
continue;
|
||||
|
||||
progs->items[cnt].prog = pl->prog;
|
||||
progs->items[cnt].cgroup_storage = pl->storage;
|
||||
for_each_cgroup_storage_type(stype)
|
||||
progs->items[cnt].cgroup_storage[stype] =
|
||||
pl->storage[stype];
|
||||
cnt++;
|
||||
}
|
||||
} while ((p = cgroup_parent(p)));
|
||||
@@ -232,7 +241,10 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
{
|
||||
struct list_head *progs = &cgrp->bpf.progs[type];
|
||||
struct bpf_prog *old_prog = NULL;
|
||||
struct bpf_cgroup_storage *storage, *old_storage = NULL;
|
||||
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
|
||||
*old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL};
|
||||
enum bpf_cgroup_storage_type stype;
|
||||
struct cgroup_subsys_state *css;
|
||||
struct bpf_prog_list *pl;
|
||||
bool pl_was_allocated;
|
||||
int err;
|
||||
@@ -254,34 +266,44 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
|
||||
return -E2BIG;
|
||||
|
||||
storage = bpf_cgroup_storage_alloc(prog);
|
||||
if (IS_ERR(storage))
|
||||
return -ENOMEM;
|
||||
for_each_cgroup_storage_type(stype) {
|
||||
storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
|
||||
if (IS_ERR(storage[stype])) {
|
||||
storage[stype] = NULL;
|
||||
for_each_cgroup_storage_type(stype)
|
||||
bpf_cgroup_storage_free(storage[stype]);
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & BPF_F_ALLOW_MULTI) {
|
||||
list_for_each_entry(pl, progs, node) {
|
||||
if (pl->prog == prog) {
|
||||
/* disallow attaching the same prog twice */
|
||||
bpf_cgroup_storage_free(storage);
|
||||
for_each_cgroup_storage_type(stype)
|
||||
bpf_cgroup_storage_free(storage[stype]);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
|
||||
if (!pl) {
|
||||
bpf_cgroup_storage_free(storage);
|
||||
for_each_cgroup_storage_type(stype)
|
||||
bpf_cgroup_storage_free(storage[stype]);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
pl_was_allocated = true;
|
||||
pl->prog = prog;
|
||||
pl->storage = storage;
|
||||
for_each_cgroup_storage_type(stype)
|
||||
pl->storage[stype] = storage[stype];
|
||||
list_add_tail(&pl->node, progs);
|
||||
} else {
|
||||
if (list_empty(progs)) {
|
||||
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
|
||||
if (!pl) {
|
||||
bpf_cgroup_storage_free(storage);
|
||||
for_each_cgroup_storage_type(stype)
|
||||
bpf_cgroup_storage_free(storage[stype]);
|
||||
return -ENOMEM;
|
||||
}
|
||||
pl_was_allocated = true;
|
||||
@@ -289,12 +311,15 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
} else {
|
||||
pl = list_first_entry(progs, typeof(*pl), node);
|
||||
old_prog = pl->prog;
|
||||
old_storage = pl->storage;
|
||||
bpf_cgroup_storage_unlink(old_storage);
|
||||
for_each_cgroup_storage_type(stype) {
|
||||
old_storage[stype] = pl->storage[stype];
|
||||
bpf_cgroup_storage_unlink(old_storage[stype]);
|
||||
}
|
||||
pl_was_allocated = false;
|
||||
}
|
||||
pl->prog = prog;
|
||||
pl->storage = storage;
|
||||
for_each_cgroup_storage_type(stype)
|
||||
pl->storage[stype] = storage[stype];
|
||||
}
|
||||
|
||||
cgrp->bpf.flags[type] = flags;
|
||||
@@ -304,26 +329,42 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
goto cleanup;
|
||||
|
||||
static_branch_inc(&cgroup_bpf_enabled_key);
|
||||
if (old_storage)
|
||||
bpf_cgroup_storage_free(old_storage);
|
||||
for_each_cgroup_storage_type(stype) {
|
||||
if (!old_storage[stype])
|
||||
continue;
|
||||
bpf_cgroup_storage_free(old_storage[stype]);
|
||||
}
|
||||
if (old_prog) {
|
||||
bpf_prog_put(old_prog);
|
||||
static_branch_dec(&cgroup_bpf_enabled_key);
|
||||
}
|
||||
bpf_cgroup_storage_link(storage, cgrp, type);
|
||||
for_each_cgroup_storage_type(stype)
|
||||
bpf_cgroup_storage_link(storage[stype], cgrp, type);
|
||||
return 0;
|
||||
|
||||
cleanup:
|
||||
/* and cleanup the prog list */
|
||||
pl->prog = old_prog;
|
||||
bpf_cgroup_storage_free(pl->storage);
|
||||
pl->storage = old_storage;
|
||||
bpf_cgroup_storage_link(old_storage, cgrp, type);
|
||||
if (pl_was_allocated) {
|
||||
list_del(&pl->node);
|
||||
kfree(pl);
|
||||
/* oom while computing effective. Free all computed effective arrays
|
||||
* since they were not activated
|
||||
*/
|
||||
css_for_each_descendant_pre(css, &cgrp->self) {
|
||||
struct cgroup *desc = container_of(css, struct cgroup, self);
|
||||
|
||||
bpf_prog_array_free(desc->bpf.inactive);
|
||||
desc->bpf.inactive = NULL;
|
||||
}
|
||||
return err;
|
||||
|
||||
/* and cleanup the prog list */
|
||||
pl->prog = old_prog;
|
||||
for_each_cgroup_storage_type(stype) {
|
||||
bpf_cgroup_storage_free(pl->storage[stype]);
|
||||
pl->storage[stype] = old_storage[stype];
|
||||
bpf_cgroup_storage_link(old_storage[stype], cgrp, type);
|
||||
}
|
||||
if (pl_was_allocated) {
|
||||
list_del(&pl->node);
|
||||
kfree(pl);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -339,6 +380,7 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
enum bpf_attach_type type, u32 unused_flags)
|
||||
{
|
||||
struct list_head *progs = &cgrp->bpf.progs[type];
|
||||
enum bpf_cgroup_storage_type stype;
|
||||
u32 flags = cgrp->bpf.flags[type];
|
||||
struct bpf_prog *old_prog = NULL;
|
||||
struct bpf_prog_list *pl;
|
||||
@@ -385,8 +427,10 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
|
||||
|
||||
/* now can actually delete it from this cgroup list */
|
||||
list_del(&pl->node);
|
||||
bpf_cgroup_storage_unlink(pl->storage);
|
||||
bpf_cgroup_storage_free(pl->storage);
|
||||
for_each_cgroup_storage_type(stype) {
|
||||
bpf_cgroup_storage_unlink(pl->storage[stype]);
|
||||
bpf_cgroup_storage_free(pl->storage[stype]);
|
||||
}
|
||||
kfree(pl);
|
||||
if (list_empty(progs))
|
||||
/* last program was detached, reset flags to zero */
|
||||
@@ -664,7 +708,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
|
||||
EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
switch (func_id) {
|
||||
case BPF_FUNC_map_lookup_elem:
|
||||
@@ -677,6 +721,8 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_get_current_uid_gid_proto;
|
||||
case BPF_FUNC_get_local_storage:
|
||||
return &bpf_get_local_storage_proto;
|
||||
case BPF_FUNC_get_current_cgroup_id:
|
||||
return &bpf_get_current_cgroup_id_proto;
|
||||
case BPF_FUNC_trace_printk:
|
||||
if (capable(CAP_SYS_ADMIN))
|
||||
return bpf_get_trace_printk_proto();
|
||||
@@ -685,6 +731,12 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
}
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
return cgroup_base_func_proto(func_id, prog);
|
||||
}
|
||||
|
||||
static bool cgroup_dev_is_valid_access(int off, int size,
|
||||
enum bpf_access_type type,
|
||||
const struct bpf_prog *prog,
|
||||
@@ -722,3 +774,415 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
|
||||
.get_func_proto = cgroup_dev_func_proto,
|
||||
.is_valid_access = cgroup_dev_is_valid_access,
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* __cgroup_bpf_run_filter_sysctl - Run a program on sysctl
|
||||
*
|
||||
* @head: sysctl table header
|
||||
* @table: sysctl table
|
||||
* @write: sysctl is being read (= 0) or written (= 1)
|
||||
* @type: type of program to be executed
|
||||
*
|
||||
* Program is run when sysctl is being accessed, either read or written, and
|
||||
* can allow or deny such access.
|
||||
*
|
||||
* This function will return %-EPERM if an attached program is found and
|
||||
* returned value != 1 during execution. In all other cases 0 is returned.
|
||||
*/
|
||||
int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
|
||||
struct ctl_table *table, int write,
|
||||
enum bpf_attach_type type)
|
||||
{
|
||||
struct bpf_sysctl_kern ctx = {
|
||||
.head = head,
|
||||
.table = table,
|
||||
.write = write,
|
||||
};
|
||||
struct cgroup *cgrp;
|
||||
int ret;
|
||||
|
||||
rcu_read_lock();
|
||||
cgrp = task_dfl_cgroup(current);
|
||||
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret == 1 ? 0 : -EPERM;
|
||||
}
|
||||
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
|
||||
|
||||
static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
|
||||
enum bpf_attach_type attach_type)
|
||||
{
|
||||
struct bpf_prog_array *prog_array;
|
||||
bool empty;
|
||||
rcu_read_lock();
|
||||
prog_array = rcu_dereference(cgrp->bpf.effective[attach_type]);
|
||||
empty = bpf_prog_array_is_empty(prog_array);
|
||||
rcu_read_unlock();
|
||||
return empty;
|
||||
}
|
||||
static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
|
||||
{
|
||||
if (unlikely(max_optlen < 0))
|
||||
return -EINVAL;
|
||||
|
||||
if (unlikely(max_optlen > PAGE_SIZE)) {
|
||||
/* We don't expose optvals that are greater than PAGE_SIZE
|
||||
* to the BPF program.
|
||||
*/
|
||||
max_optlen = PAGE_SIZE;
|
||||
}
|
||||
|
||||
ctx->optval = kzalloc(max_optlen, GFP_USER);
|
||||
if (!ctx->optval)
|
||||
return -ENOMEM;
|
||||
ctx->optval_end = ctx->optval + max_optlen;
|
||||
return max_optlen;
|
||||
}
|
||||
static void sockopt_free_buf(struct bpf_sockopt_kern *ctx)
|
||||
{
|
||||
kfree(ctx->optval);
|
||||
}
|
||||
int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
|
||||
int *optname, char __user *optval,
|
||||
int *optlen, char **kernel_optval)
|
||||
{
|
||||
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
|
||||
struct bpf_sockopt_kern ctx = {
|
||||
.sk = sk,
|
||||
.level = *level,
|
||||
.optname = *optname,
|
||||
};
|
||||
int ret, max_optlen;
|
||||
|
||||
/* Opportunistic check to see whether we have any BPF program
|
||||
* attached to the hook so we don't waste time allocating
|
||||
* memory and locking the socket.
|
||||
*/
|
||||
if (!cgroup_bpf_enabled ||
|
||||
__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_SETSOCKOPT))
|
||||
return 0;
|
||||
|
||||
/* Allocate a bit more than the initial user buffer for
|
||||
* BPF program. The canonical use case is overriding
|
||||
* TCP_CONGESTION(nv) to TCP_CONGESTION(cubic).
|
||||
*/
|
||||
max_optlen = max_t(int, 16, *optlen);
|
||||
|
||||
max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
|
||||
if (max_optlen < 0)
|
||||
return max_optlen;
|
||||
|
||||
ctx.optlen = *optlen;
|
||||
|
||||
if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) != 0) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
lock_sock(sk);
|
||||
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT],
|
||||
&ctx, BPF_PROG_RUN);
|
||||
release_sock(sk);
|
||||
if (!ret) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
if (ctx.optlen == -1) {
|
||||
/* optlen set to -1, bypass kernel */
|
||||
ret = 1;
|
||||
} else if (ctx.optlen > max_optlen || ctx.optlen < -1) {
|
||||
/* optlen is out of bounds */
|
||||
ret = -EFAULT;
|
||||
} else {
|
||||
/* optlen within bounds, run kernel handler */
|
||||
ret = 0;
|
||||
/* export any potential modifications */
|
||||
*level = ctx.level;
|
||||
*optname = ctx.optname;
|
||||
|
||||
/* optlen == 0 from BPF indicates that we should
|
||||
* use original userspace data.
|
||||
*/
|
||||
if (ctx.optlen != 0) {
|
||||
*optlen = ctx.optlen;
|
||||
*kernel_optval = ctx.optval;
|
||||
}
|
||||
}
|
||||
out:
|
||||
if (ret)
|
||||
sockopt_free_buf(&ctx);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(__cgroup_bpf_run_filter_setsockopt);
|
||||
int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
|
||||
int optname, char __user *optval,
|
||||
int __user *optlen, int max_optlen,
|
||||
int retval)
|
||||
{
|
||||
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
|
||||
struct bpf_sockopt_kern ctx = {
|
||||
.sk = sk,
|
||||
.level = level,
|
||||
.optname = optname,
|
||||
.retval = retval,
|
||||
};
|
||||
int ret;
|
||||
/* Opportunistic check to see whether we have any BPF program
|
||||
* attached to the hook so we don't waste time allocating
|
||||
* memory and locking the socket.
|
||||
*/
|
||||
if (!cgroup_bpf_enabled ||
|
||||
__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT))
|
||||
return retval;
|
||||
|
||||
ctx.optlen = max_optlen;
|
||||
|
||||
max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
|
||||
if (max_optlen < 0)
|
||||
return max_optlen;
|
||||
|
||||
if (!retval) {
|
||||
/* If kernel getsockopt finished successfully,
|
||||
* copy whatever was returned to the user back
|
||||
* into our temporary buffer. Set optlen to the
|
||||
* one that kernel returned as well to let
|
||||
* BPF programs inspect the value.
|
||||
*/
|
||||
if (get_user(ctx.optlen, optlen)) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (copy_from_user(ctx.optval, optval,
|
||||
min(ctx.optlen, max_optlen)) != 0) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
lock_sock(sk);
|
||||
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
|
||||
&ctx, BPF_PROG_RUN);
|
||||
release_sock(sk);
|
||||
if (!ret) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
if (ctx.optlen > max_optlen) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
/* BPF programs only allowed to set retval to 0, not some
|
||||
* arbitrary value.
|
||||
*/
|
||||
if (ctx.retval != 0 && ctx.retval != retval) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ctx.optlen != 0) {
|
||||
if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
|
||||
put_user(ctx.optlen, optlen)) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ret = ctx.retval;
|
||||
out:
|
||||
sockopt_free_buf(&ctx);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(__cgroup_bpf_run_filter_getsockopt);
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
return cgroup_dev_func_proto(func_id, prog);
|
||||
}
|
||||
|
||||
static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
|
||||
const struct bpf_prog *prog,
|
||||
struct bpf_insn_access_aux *info)
|
||||
{
|
||||
const int size_default = sizeof(__u32);
|
||||
|
||||
if (off < 0 || off + size > sizeof(struct bpf_sysctl) ||
|
||||
off % size || type != BPF_READ)
|
||||
return false;
|
||||
|
||||
switch (off) {
|
||||
case offsetof(struct bpf_sysctl, write):
|
||||
bpf_ctx_record_field_size(info, size_default);
|
||||
return bpf_ctx_narrow_access_ok(off, size, size_default);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
|
||||
const struct bpf_insn *si,
|
||||
struct bpf_insn *insn_buf,
|
||||
struct bpf_prog *prog, u32 *target_size)
|
||||
{
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
|
||||
switch (si->off) {
|
||||
case offsetof(struct bpf_sysctl, write):
|
||||
*insn++ = BPF_LDX_MEM(
|
||||
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
|
||||
bpf_target_off(struct bpf_sysctl_kern, write,
|
||||
FIELD_SIZEOF(struct bpf_sysctl_kern,
|
||||
write),
|
||||
target_size));
|
||||
break;
|
||||
}
|
||||
|
||||
return insn - insn_buf;
|
||||
}
|
||||
|
||||
const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
|
||||
.get_func_proto = sysctl_func_proto,
|
||||
.is_valid_access = sysctl_is_valid_access,
|
||||
.convert_ctx_access = sysctl_convert_ctx_access,
|
||||
};
|
||||
|
||||
const struct bpf_prog_ops cg_sysctl_prog_ops = {
|
||||
};
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
switch (func_id) {
|
||||
case BPF_FUNC_sk_storage_get:
|
||||
return &bpf_sk_storage_get_proto;
|
||||
case BPF_FUNC_sk_storage_delete:
|
||||
return &bpf_sk_storage_delete_proto;
|
||||
#ifdef CONFIG_INET
|
||||
case BPF_FUNC_tcp_sock:
|
||||
return &bpf_tcp_sock_proto;
|
||||
#endif
|
||||
default:
|
||||
return cgroup_base_func_proto(func_id, prog);
|
||||
}
|
||||
}
|
||||
static bool cg_sockopt_is_valid_access(int off, int size,
|
||||
enum bpf_access_type type,
|
||||
const struct bpf_prog *prog,
|
||||
struct bpf_insn_access_aux *info)
|
||||
{
|
||||
const int size_default = sizeof(__u32);
|
||||
if (off < 0 || off >= sizeof(struct bpf_sockopt))
|
||||
return false;
|
||||
if (off % size != 0)
|
||||
return false;
|
||||
if (type == BPF_WRITE) {
|
||||
switch (off) {
|
||||
case offsetof(struct bpf_sockopt, retval):
|
||||
if (size != size_default)
|
||||
return false;
|
||||
return prog->expected_attach_type ==
|
||||
BPF_CGROUP_GETSOCKOPT;
|
||||
case offsetof(struct bpf_sockopt, optname):
|
||||
/* fallthrough */
|
||||
case offsetof(struct bpf_sockopt, level):
|
||||
if (size != size_default)
|
||||
return false;
|
||||
return prog->expected_attach_type ==
|
||||
BPF_CGROUP_SETSOCKOPT;
|
||||
case offsetof(struct bpf_sockopt, optlen):
|
||||
return size == size_default;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
switch (off) {
|
||||
case offsetof(struct bpf_sockopt, sk):
|
||||
if (size != sizeof(__u64))
|
||||
return false;
|
||||
info->reg_type = PTR_TO_SOCKET;
|
||||
break;
|
||||
case offsetof(struct bpf_sockopt, optval):
|
||||
if (size != sizeof(__u64))
|
||||
return false;
|
||||
info->reg_type = PTR_TO_PACKET;
|
||||
break;
|
||||
case offsetof(struct bpf_sockopt, optval_end):
|
||||
if (size != sizeof(__u64))
|
||||
return false;
|
||||
info->reg_type = PTR_TO_PACKET_END;
|
||||
break;
|
||||
case offsetof(struct bpf_sockopt, retval):
|
||||
if (size != size_default)
|
||||
return false;
|
||||
return prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT;
|
||||
default:
|
||||
if (size != size_default)
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#define CG_SOCKOPT_ACCESS_FIELD(T, F) \
|
||||
T(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \
|
||||
si->dst_reg, si->src_reg, \
|
||||
offsetof(struct bpf_sockopt_kern, F))
|
||||
static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
|
||||
const struct bpf_insn *si,
|
||||
struct bpf_insn *insn_buf,
|
||||
struct bpf_prog *prog,
|
||||
u32 *target_size)
|
||||
{
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
switch (si->off) {
|
||||
case offsetof(struct bpf_sockopt, sk):
|
||||
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, sk);
|
||||
break;
|
||||
case offsetof(struct bpf_sockopt, level):
|
||||
if (type == BPF_WRITE)
|
||||
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, level);
|
||||
else
|
||||
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, level);
|
||||
break;
|
||||
case offsetof(struct bpf_sockopt, optname):
|
||||
if (type == BPF_WRITE)
|
||||
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optname);
|
||||
else
|
||||
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optname);
|
||||
break;
|
||||
case offsetof(struct bpf_sockopt, optlen):
|
||||
if (type == BPF_WRITE)
|
||||
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optlen);
|
||||
else
|
||||
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
|
||||
break;
|
||||
case offsetof(struct bpf_sockopt, retval):
|
||||
if (type == BPF_WRITE)
|
||||
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
|
||||
else
|
||||
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval);
|
||||
break;
|
||||
case offsetof(struct bpf_sockopt, optval):
|
||||
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
|
||||
break;
|
||||
case offsetof(struct bpf_sockopt, optval_end):
|
||||
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval_end);
|
||||
break;
|
||||
}
|
||||
return insn - insn_buf;
|
||||
}
|
||||
static int cg_sockopt_get_prologue(struct bpf_insn *insn_buf,
|
||||
bool direct_write,
|
||||
const struct bpf_prog *prog)
|
||||
{
|
||||
/* Nothing to do for sockopt argument. The data is kzalloc'ated.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
const struct bpf_verifier_ops cg_sockopt_verifier_ops = {
|
||||
.get_func_proto = cg_sockopt_func_proto,
|
||||
.is_valid_access = cg_sockopt_is_valid_access,
|
||||
.convert_ctx_access = cg_sockopt_convert_ctx_access,
|
||||
.gen_prologue = cg_sockopt_get_prologue,
|
||||
};
|
||||
const struct bpf_prog_ops cg_sockopt_prog_ops = {
|
||||
};
|
||||
|
||||
@@ -21,12 +21,14 @@
|
||||
* Kris Katterjohn - Added many additional checks in bpf_check_classic()
|
||||
*/
|
||||
|
||||
#include <uapi/linux/btf.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/moduleloader.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/btf.h>
|
||||
#include <linux/frame.h>
|
||||
#include <linux/rbtree_latch.h>
|
||||
#include <linux/kallsyms.h>
|
||||
@@ -108,6 +110,80 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_prog_alloc);
|
||||
|
||||
int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
|
||||
{
|
||||
if (!prog->aux->nr_linfo || !prog->jit_requested)
|
||||
return 0;
|
||||
prog->aux->jited_linfo = kcalloc(prog->aux->nr_linfo,
|
||||
sizeof(*prog->aux->jited_linfo),
|
||||
GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!prog->aux->jited_linfo)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
void bpf_prog_free_jited_linfo(struct bpf_prog *prog)
|
||||
{
|
||||
kfree(prog->aux->jited_linfo);
|
||||
prog->aux->jited_linfo = NULL;
|
||||
}
|
||||
void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog)
|
||||
{
|
||||
if (prog->aux->jited_linfo && !prog->aux->jited_linfo[0])
|
||||
bpf_prog_free_jited_linfo(prog);
|
||||
}
|
||||
/* The jit engine is responsible to provide an array
|
||||
* for insn_off to the jited_off mapping (insn_to_jit_off).
|
||||
*
|
||||
* The idx to this array is the insn_off. Hence, the insn_off
|
||||
* here is relative to the prog itself instead of the main prog.
|
||||
* This array has one entry for each xlated bpf insn.
|
||||
*
|
||||
* jited_off is the byte off to the last byte of the jited insn.
|
||||
*
|
||||
* Hence, with
|
||||
* insn_start:
|
||||
* The first bpf insn off of the prog. The insn off
|
||||
* here is relative to the main prog.
|
||||
* e.g. if prog is a subprog, insn_start > 0
|
||||
* linfo_idx:
|
||||
* The prog's idx to prog->aux->linfo and jited_linfo
|
||||
*
|
||||
* jited_linfo[linfo_idx] = prog->bpf_func
|
||||
*
|
||||
* For i > linfo_idx,
|
||||
*
|
||||
* jited_linfo[i] = prog->bpf_func +
|
||||
* insn_to_jit_off[linfo[i].insn_off - insn_start - 1]
|
||||
*/
|
||||
void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
|
||||
const u32 *insn_to_jit_off)
|
||||
{
|
||||
u32 linfo_idx, insn_start, insn_end, nr_linfo, i;
|
||||
const struct bpf_line_info *linfo;
|
||||
void **jited_linfo;
|
||||
if (!prog->aux->jited_linfo)
|
||||
/* Userspace did not provide linfo */
|
||||
return;
|
||||
linfo_idx = prog->aux->linfo_idx;
|
||||
linfo = &prog->aux->linfo[linfo_idx];
|
||||
insn_start = linfo[0].insn_off;
|
||||
insn_end = insn_start + prog->len;
|
||||
jited_linfo = &prog->aux->jited_linfo[linfo_idx];
|
||||
jited_linfo[0] = prog->bpf_func;
|
||||
nr_linfo = prog->aux->nr_linfo - linfo_idx;
|
||||
for (i = 1; i < nr_linfo && linfo[i].insn_off < insn_end; i++)
|
||||
/* The verifier ensures that linfo[i].insn_off is
|
||||
* strictly increasing
|
||||
*/
|
||||
jited_linfo[i] = prog->bpf_func +
|
||||
insn_to_jit_off[linfo[i].insn_off - insn_start - 1];
|
||||
}
|
||||
void bpf_prog_free_linfo(struct bpf_prog *prog)
|
||||
{
|
||||
bpf_prog_free_jited_linfo(prog);
|
||||
kvfree(prog->aux->linfo);
|
||||
}
|
||||
|
||||
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
|
||||
gfp_t gfp_extra_flags)
|
||||
{
|
||||
@@ -180,7 +256,8 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
|
||||
dst[i] = fp->insnsi[i];
|
||||
if (!was_ld_map &&
|
||||
dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
|
||||
dst[i].src_reg == BPF_PSEUDO_MAP_FD) {
|
||||
(dst[i].src_reg == BPF_PSEUDO_MAP_FD ||
|
||||
dst[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
|
||||
was_ld_map = true;
|
||||
dst[i].imm = 0;
|
||||
} else if (was_ld_map &&
|
||||
@@ -297,6 +374,22 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bpf_adj_linfo(struct bpf_prog *prog, u32 off, u32 delta)
|
||||
{
|
||||
struct bpf_line_info *linfo;
|
||||
u32 i, nr_linfo;
|
||||
nr_linfo = prog->aux->nr_linfo;
|
||||
if (!nr_linfo || !delta)
|
||||
return;
|
||||
linfo = prog->aux->linfo;
|
||||
for (i = 0; i < nr_linfo; i++)
|
||||
if (off < linfo[i].insn_off)
|
||||
break;
|
||||
/* Push all off < linfo[i].insn_off by delta */
|
||||
for (; i < nr_linfo; i++)
|
||||
linfo[i].insn_off += delta;
|
||||
}
|
||||
|
||||
struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
|
||||
const struct bpf_insn *patch, u32 len)
|
||||
{
|
||||
@@ -352,6 +445,8 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
|
||||
*/
|
||||
BUG_ON(bpf_adj_branches(prog_adj, off, insn_delta, false));
|
||||
|
||||
bpf_adj_linfo(prog_adj, off, insn_delta);
|
||||
|
||||
return prog_adj;
|
||||
}
|
||||
|
||||
@@ -394,6 +489,8 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog,
|
||||
static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
|
||||
{
|
||||
const char *end = sym + KSYM_NAME_LEN;
|
||||
const struct btf_type *type;
|
||||
const char *func_name;
|
||||
|
||||
BUILD_BUG_ON(sizeof("bpf_prog_") +
|
||||
sizeof(prog->tag) * 2 +
|
||||
@@ -408,6 +505,16 @@ static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
|
||||
|
||||
sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_");
|
||||
sym = bin2hex(sym, prog->tag, sizeof(prog->tag));
|
||||
|
||||
/* prog->aux->name will be ignored if full btf name is available */
|
||||
if (prog->aux->btf) {
|
||||
type = btf_type_by_id(prog->aux->btf,
|
||||
prog->aux->func_info[prog->aux->func_idx].type_id);
|
||||
func_name = btf_name_by_offset(prog->aux->btf, type->name_off);
|
||||
snprintf(sym, (size_t)(end - sym), "_%s", func_name);
|
||||
return;
|
||||
}
|
||||
|
||||
if (prog->aux->name[0])
|
||||
snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name);
|
||||
else
|
||||
@@ -1592,13 +1699,19 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
|
||||
* be JITed, but falls back to the interpreter.
|
||||
*/
|
||||
if (!bpf_prog_is_dev_bound(fp->aux)) {
|
||||
*err = bpf_prog_alloc_jited_linfo(fp);
|
||||
if (*err)
|
||||
return fp;
|
||||
fp = bpf_int_jit_compile(fp);
|
||||
#ifdef CONFIG_BPF_JIT_ALWAYS_ON
|
||||
if (!fp->jited) {
|
||||
bpf_prog_free_jited_linfo(fp);
|
||||
#ifdef CONFIG_BPF_JIT_ALWAYS_ON
|
||||
*err = -ENOTSUPP;
|
||||
return fp;
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
bpf_prog_free_unused_jited_linfo(fp);
|
||||
}
|
||||
} else {
|
||||
*err = bpf_prog_offload_compile(fp);
|
||||
if (*err)
|
||||
@@ -1679,6 +1792,15 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *array)
|
||||
return cnt;
|
||||
}
|
||||
|
||||
bool bpf_prog_array_is_empty(struct bpf_prog_array *array)
|
||||
{
|
||||
struct bpf_prog_array_item *item;
|
||||
for (item = array->items; item->prog; item++)
|
||||
if (item->prog != &dummy_bpf_prog.prog)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array,
|
||||
u32 *prog_ids,
|
||||
@@ -1891,6 +2013,8 @@ BPF_CALL_0(bpf_user_rnd_u32)
|
||||
const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
|
||||
const struct bpf_func_proto bpf_map_update_elem_proto __weak;
|
||||
const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
|
||||
const struct bpf_func_proto bpf_spin_lock_proto __weak;
|
||||
const struct bpf_func_proto bpf_spin_unlock_proto __weak;
|
||||
|
||||
const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
|
||||
const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
|
||||
@@ -1901,8 +2025,6 @@ const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak;
|
||||
const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
|
||||
const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
|
||||
const struct bpf_func_proto bpf_get_current_comm_proto __weak;
|
||||
const struct bpf_func_proto bpf_sock_map_update_proto __weak;
|
||||
const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
|
||||
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
|
||||
const struct bpf_func_proto bpf_get_local_storage_proto __weak;
|
||||
|
||||
|
||||
@@ -46,6 +46,12 @@
|
||||
* notifier hook walks the map we know that new dev references can not be
|
||||
* added by the user because core infrastructure ensures dev_get_by_index()
|
||||
* calls will fail at this point.
|
||||
*
|
||||
* The devmap_hash type is a map type which interprets keys as ifindexes and
|
||||
* indexes these using a hashmap. This allows maps that use ifindex as key to be
|
||||
* densely packed instead of having holes in the lookup array for unused
|
||||
* ifindexes. The setup and packet enqueue/send code is shared between the two
|
||||
* types of devmap; only the lookup and insertion is different.
|
||||
*/
|
||||
#include <linux/bpf.h>
|
||||
#include <net/xdp.h>
|
||||
@@ -64,6 +70,7 @@ struct xdp_bulk_queue {
|
||||
|
||||
struct bpf_dtab_netdev {
|
||||
struct net_device *dev; /* must be first member, due to tracepoint */
|
||||
struct hlist_node index_hlist;
|
||||
struct bpf_dtab *dtab;
|
||||
unsigned int bit;
|
||||
struct xdp_bulk_queue __percpu *bulkq;
|
||||
@@ -75,11 +82,30 @@ struct bpf_dtab {
|
||||
struct bpf_dtab_netdev **netdev_map;
|
||||
unsigned long __percpu *flush_needed;
|
||||
struct list_head list;
|
||||
|
||||
/* these are only used for DEVMAP_HASH type maps */
|
||||
struct hlist_head *dev_index_head;
|
||||
spinlock_t index_lock;
|
||||
unsigned int items;
|
||||
u32 n_buckets;
|
||||
};
|
||||
|
||||
static DEFINE_SPINLOCK(dev_map_lock);
|
||||
static LIST_HEAD(dev_map_list);
|
||||
|
||||
static struct hlist_head *dev_map_create_hash(unsigned int entries)
|
||||
{
|
||||
int i;
|
||||
struct hlist_head *hash;
|
||||
|
||||
hash = kmalloc_array(entries, sizeof(*hash), GFP_KERNEL);
|
||||
if (hash != NULL)
|
||||
for (i = 0; i < entries; i++)
|
||||
INIT_HLIST_HEAD(&hash[i]);
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
static u64 dev_map_bitmap_size(const union bpf_attr *attr)
|
||||
{
|
||||
return BITS_TO_LONGS((u64) attr->max_entries) * sizeof(unsigned long);
|
||||
@@ -99,6 +125,11 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
|
||||
attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
/* Lookup returns a pointer straight to dev->ifindex, so make sure the
|
||||
* verifier prevents writes from the BPF side
|
||||
*/
|
||||
attr->map_flags |= BPF_F_RDONLY_PROG;
|
||||
|
||||
dtab = kzalloc(sizeof(*dtab), GFP_USER);
|
||||
if (!dtab)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
@@ -113,6 +144,16 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
|
||||
|
||||
dtab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
|
||||
|
||||
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
|
||||
dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries);
|
||||
|
||||
if (!dtab->n_buckets) { /* Overflow check */
|
||||
err = -EINVAL;
|
||||
goto free_dtab;
|
||||
}
|
||||
cost += sizeof(struct hlist_head) * dtab->n_buckets;
|
||||
}
|
||||
|
||||
/* if map size is larger than memlock limit, reject it early */
|
||||
err = bpf_map_precharge_memlock(dtab->map.pages);
|
||||
if (err)
|
||||
@@ -133,13 +174,24 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
|
||||
if (!dtab->netdev_map)
|
||||
goto free_dtab;
|
||||
|
||||
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
|
||||
dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets);
|
||||
if (!dtab->dev_index_head)
|
||||
goto free_map_area;
|
||||
|
||||
spin_lock_init(&dtab->index_lock);
|
||||
}
|
||||
|
||||
spin_lock(&dev_map_lock);
|
||||
list_add_tail_rcu(&dtab->list, &dev_map_list);
|
||||
spin_unlock(&dev_map_lock);
|
||||
|
||||
return &dtab->map;
|
||||
free_map_area:
|
||||
bpf_map_area_free(dtab->netdev_map);
|
||||
free_dtab:
|
||||
free_percpu(dtab->flush_needed);
|
||||
kfree(dtab->dev_index_head);
|
||||
kfree(dtab);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
@@ -193,6 +245,7 @@ static void dev_map_free(struct bpf_map *map)
|
||||
|
||||
free_percpu(dtab->flush_needed);
|
||||
bpf_map_area_free(dtab->netdev_map);
|
||||
kfree(dtab->dev_index_head);
|
||||
kfree(dtab);
|
||||
}
|
||||
|
||||
@@ -213,6 +266,77 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
|
||||
int idx)
|
||||
{
|
||||
return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)];
|
||||
}
|
||||
|
||||
static struct bpf_dtab_netdev *__dev_map_hash_lookup_elem_dtab(struct bpf_map *map, u32 key)
|
||||
{
|
||||
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
||||
struct hlist_head *head = dev_map_index_hash(dtab, key);
|
||||
struct bpf_dtab_netdev *dev;
|
||||
|
||||
hlist_for_each_entry_rcu(dev, head, index_hlist)
|
||||
if (dev->bit == key)
|
||||
return dev;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
|
||||
{
|
||||
struct bpf_dtab_netdev *dev = __dev_map_hash_lookup_elem_dtab(map, key);
|
||||
|
||||
return dev ? dev : NULL;
|
||||
}
|
||||
|
||||
static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
|
||||
void *next_key)
|
||||
{
|
||||
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
||||
u32 idx, *next = next_key;
|
||||
struct bpf_dtab_netdev *dev, *next_dev;
|
||||
struct hlist_head *head;
|
||||
int i = 0;
|
||||
|
||||
if (!key)
|
||||
goto find_first;
|
||||
|
||||
idx = *(u32 *)key;
|
||||
|
||||
dev = __dev_map_hash_lookup_elem_dtab(map, idx);
|
||||
if (!dev)
|
||||
goto find_first;
|
||||
|
||||
next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&dev->index_hlist)),
|
||||
struct bpf_dtab_netdev, index_hlist);
|
||||
|
||||
if (next_dev) {
|
||||
*next = next_dev->bit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
i = idx & (dtab->n_buckets - 1);
|
||||
i++;
|
||||
|
||||
find_first:
|
||||
for (; i < dtab->n_buckets; i++) {
|
||||
head = dev_map_index_hash(dtab, i);
|
||||
|
||||
next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
|
||||
struct bpf_dtab_netdev,
|
||||
index_hlist);
|
||||
if (next_dev) {
|
||||
*next = next_dev->bit;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
|
||||
{
|
||||
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
||||
@@ -383,6 +507,14 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
return dev ? &dev->ifindex : NULL;
|
||||
}
|
||||
|
||||
static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map, *(u32 *)key);
|
||||
struct net_device *dev = obj ? obj->dev : NULL;
|
||||
|
||||
return dev ? &dev->ifindex : NULL;
|
||||
}
|
||||
|
||||
static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
|
||||
{
|
||||
if (dev->dev->netdev_ops->ndo_xdp_xmit) {
|
||||
@@ -437,12 +569,57 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dev_map_hash_delete_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
||||
struct bpf_dtab_netdev *old_dev;
|
||||
int k = *(u32 *)key;
|
||||
unsigned long flags;
|
||||
int ret = -ENOENT;
|
||||
|
||||
spin_lock_irqsave(&dtab->index_lock, flags);
|
||||
|
||||
old_dev = __dev_map_hash_lookup_elem_dtab(map, k);
|
||||
if (old_dev) {
|
||||
dtab->items--;
|
||||
hlist_del_init_rcu(&old_dev->index_hlist);
|
||||
call_rcu(&old_dev->rcu, __dev_map_entry_free);
|
||||
ret = 0;
|
||||
}
|
||||
spin_unlock_irqrestore(&dtab->index_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
|
||||
struct bpf_dtab *dtab,
|
||||
u32 ifindex,
|
||||
unsigned int idx)
|
||||
{
|
||||
gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
|
||||
struct bpf_dtab_netdev *dev;
|
||||
|
||||
dev = kmalloc_node(sizeof(*dev), gfp, dtab->map.numa_node);
|
||||
if (!dev)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
dev->dev = dev_get_by_index(net, ifindex);
|
||||
if (!dev->dev) {
|
||||
kfree(dev);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
dev->bit = idx;
|
||||
dev->dtab = dtab;
|
||||
|
||||
return dev;
|
||||
}
|
||||
|
||||
static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
u64 map_flags)
|
||||
{
|
||||
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
||||
struct net *net = current->nsproxy->net_ns;
|
||||
gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
|
||||
struct bpf_dtab_netdev *dev, *old_dev;
|
||||
u32 i = *(u32 *)key;
|
||||
u32 ifindex = *(u32 *)value;
|
||||
@@ -457,26 +634,9 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
if (!ifindex) {
|
||||
dev = NULL;
|
||||
} else {
|
||||
dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node);
|
||||
if (!dev)
|
||||
return -ENOMEM;
|
||||
|
||||
dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
|
||||
sizeof(void *), gfp);
|
||||
if (!dev->bulkq) {
|
||||
kfree(dev);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
dev->dev = dev_get_by_index(net, ifindex);
|
||||
if (!dev->dev) {
|
||||
free_percpu(dev->bulkq);
|
||||
kfree(dev);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
dev->bit = i;
|
||||
dev->dtab = dtab;
|
||||
dev = __dev_map_alloc_node(net, dtab, ifindex, i);
|
||||
if (IS_ERR(dev))
|
||||
return PTR_ERR(dev);
|
||||
}
|
||||
|
||||
/* Use call_rcu() here to ensure rcu critical sections have completed
|
||||
@@ -490,6 +650,56 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
|
||||
void *key, void *value, u64 map_flags)
|
||||
{
|
||||
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
||||
struct bpf_dtab_netdev *dev, *old_dev;
|
||||
u32 ifindex = *(u32 *)value;
|
||||
u32 idx = *(u32 *)key;
|
||||
unsigned long flags;
|
||||
|
||||
if (unlikely(map_flags > BPF_EXIST || !ifindex))
|
||||
return -EINVAL;
|
||||
|
||||
old_dev = __dev_map_hash_lookup_elem_dtab(map, idx);
|
||||
if (old_dev && (map_flags & BPF_NOEXIST))
|
||||
return -EEXIST;
|
||||
|
||||
dev = __dev_map_alloc_node(net, dtab, ifindex, idx);
|
||||
if (IS_ERR(dev))
|
||||
return PTR_ERR(dev);
|
||||
|
||||
spin_lock_irqsave(&dtab->index_lock, flags);
|
||||
|
||||
if (old_dev) {
|
||||
hlist_del_rcu(&old_dev->index_hlist);
|
||||
} else {
|
||||
if (dtab->items >= dtab->map.max_entries) {
|
||||
spin_unlock_irqrestore(&dtab->index_lock, flags);
|
||||
call_rcu(&dev->rcu, __dev_map_entry_free);
|
||||
return -E2BIG;
|
||||
}
|
||||
dtab->items++;
|
||||
}
|
||||
|
||||
hlist_add_head_rcu(&dev->index_hlist,
|
||||
dev_map_index_hash(dtab, idx));
|
||||
spin_unlock_irqrestore(&dtab->index_lock, flags);
|
||||
|
||||
if (old_dev)
|
||||
call_rcu(&old_dev->rcu, __dev_map_entry_free);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
u64 map_flags)
|
||||
{
|
||||
return __dev_map_hash_update_elem(current->nsproxy->net_ns,
|
||||
map, key, value, map_flags);
|
||||
}
|
||||
|
||||
const struct bpf_map_ops dev_map_ops = {
|
||||
.map_alloc = dev_map_alloc,
|
||||
.map_free = dev_map_free,
|
||||
@@ -500,6 +710,15 @@ const struct bpf_map_ops dev_map_ops = {
|
||||
.map_check_btf = map_check_no_btf,
|
||||
};
|
||||
|
||||
const struct bpf_map_ops dev_map_hash_ops = {
|
||||
.map_alloc = dev_map_alloc,
|
||||
.map_free = dev_map_free,
|
||||
.map_get_next_key = dev_map_hash_get_next_key,
|
||||
.map_lookup_elem = dev_map_hash_lookup_elem,
|
||||
.map_update_elem = dev_map_hash_update_elem,
|
||||
.map_delete_elem = dev_map_hash_delete_elem,
|
||||
};
|
||||
|
||||
static int dev_map_notification(struct notifier_block *notifier,
|
||||
ulong event, void *ptr)
|
||||
{
|
||||
|
||||
@@ -208,10 +208,11 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
|
||||
* part of the ldimm64 insn is accessible.
|
||||
*/
|
||||
u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
|
||||
bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD;
|
||||
bool is_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD ||
|
||||
insn->src_reg == BPF_PSEUDO_MAP_VALUE;
|
||||
char tmp[64];
|
||||
|
||||
if (map_ptr && !allow_ptr_leaks)
|
||||
if (is_ptr && !allow_ptr_leaks)
|
||||
imm = 0;
|
||||
|
||||
verbose(cbs->private_data, "(%02x) r%d = %s\n",
|
||||
|
||||
@@ -725,21 +725,12 @@ static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
|
||||
BITS_PER_LONG == 64;
|
||||
}
|
||||
|
||||
static u32 htab_size_value(const struct bpf_htab *htab, bool percpu)
|
||||
{
|
||||
u32 size = htab->map.value_size;
|
||||
|
||||
if (percpu || fd_htab_map_needs_adjust(htab))
|
||||
size = round_up(size, 8);
|
||||
return size;
|
||||
}
|
||||
|
||||
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
|
||||
void *value, u32 key_size, u32 hash,
|
||||
bool percpu, bool onallcpus,
|
||||
struct htab_elem *old_elem)
|
||||
{
|
||||
u32 size = htab_size_value(htab, percpu);
|
||||
u32 size = htab->map.value_size;
|
||||
bool prealloc = htab_is_prealloc(htab);
|
||||
struct htab_elem *l_new, **pl_new;
|
||||
void __percpu *pptr;
|
||||
@@ -778,10 +769,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
|
||||
l_new = ERR_PTR(-ENOMEM);
|
||||
goto dec_count;
|
||||
}
|
||||
check_and_init_map_lock(&htab->map,
|
||||
l_new->key + round_up(key_size, 8));
|
||||
}
|
||||
|
||||
memcpy(l_new->key, key, key_size);
|
||||
if (percpu) {
|
||||
size = round_up(size, 8);
|
||||
if (prealloc) {
|
||||
pptr = htab_elem_get_ptr(l_new, key_size);
|
||||
} else {
|
||||
@@ -799,8 +793,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
|
||||
|
||||
if (!prealloc)
|
||||
htab_elem_set_ptr(l_new, key_size, pptr);
|
||||
} else {
|
||||
} else if (fd_htab_map_needs_adjust(htab)) {
|
||||
size = round_up(size, 8);
|
||||
memcpy(l_new->key + round_up(key_size, 8), value, size);
|
||||
} else {
|
||||
copy_map_value(&htab->map,
|
||||
l_new->key + round_up(key_size, 8),
|
||||
value);
|
||||
}
|
||||
|
||||
l_new->hash = hash;
|
||||
@@ -813,11 +812,11 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
|
||||
static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
|
||||
u64 map_flags)
|
||||
{
|
||||
if (l_old && map_flags == BPF_NOEXIST)
|
||||
if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
|
||||
/* elem already exists */
|
||||
return -EEXIST;
|
||||
|
||||
if (!l_old && map_flags == BPF_EXIST)
|
||||
if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
|
||||
/* elem doesn't exist, cannot update it */
|
||||
return -ENOENT;
|
||||
|
||||
@@ -858,6 +857,20 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (unlikely(l_old && (map_flags & BPF_F_LOCK))) {
|
||||
/* first lookup without the bucket lock didn't find the element,
|
||||
* but second lookup with the bucket lock found it.
|
||||
* This case is highly unlikely, but has to be dealt with:
|
||||
* grab the element lock in addition to the bucket lock
|
||||
* and update element in place
|
||||
*/
|
||||
copy_map_value_locked(map,
|
||||
l_old->key + round_up(key_size, 8),
|
||||
value, false);
|
||||
ret = 0;
|
||||
goto err;
|
||||
}
|
||||
|
||||
l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
|
||||
l_old);
|
||||
if (IS_ERR(l_new)) {
|
||||
@@ -970,6 +983,31 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
|
||||
b = __select_bucket(htab, hash);
|
||||
head = &b->head;
|
||||
|
||||
if (unlikely(map_flags & BPF_F_LOCK)) {
|
||||
if (unlikely(!map_value_has_spin_lock(map)))
|
||||
return -EINVAL;
|
||||
|
||||
/* find an element without taking the bucket lock */
|
||||
l_old = lookup_nulls_elem_raw(head, hash, key, key_size,
|
||||
htab->n_buckets);
|
||||
ret = check_flags(htab, l_old, map_flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (l_old) {
|
||||
/* grab the element lock and update value in place */
|
||||
copy_map_value_locked(map,
|
||||
l_old->key + round_up(key_size, 8),
|
||||
value, false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* fall through, grab the bucket lock and lookup again.
|
||||
* 99.9% chance that the element won't be found,
|
||||
* but second lookup under lock has to be done.
|
||||
*/
|
||||
}
|
||||
|
||||
/* bpf_map_update_elem() can be called in_irq() */
|
||||
raw_spin_lock_irqsave(&b->lock, flags);
|
||||
|
||||
@@ -1010,7 +1048,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
|
||||
u32 key_size, hash;
|
||||
int ret;
|
||||
|
||||
if (unlikely(map_flags > BPF_EXIST))
|
||||
if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
|
||||
/* unknown flags */
|
||||
return -EINVAL;
|
||||
|
||||
|
||||
@@ -192,6 +192,91 @@ const struct bpf_func_proto bpf_get_current_comm_proto = {
|
||||
.arg2_type = ARG_CONST_SIZE,
|
||||
};
|
||||
|
||||
#if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
|
||||
static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
|
||||
{
|
||||
arch_spinlock_t *l = (void *)lock;
|
||||
union {
|
||||
__u32 val;
|
||||
arch_spinlock_t lock;
|
||||
} u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
|
||||
compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
|
||||
BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
|
||||
BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
|
||||
arch_spin_lock(l);
|
||||
}
|
||||
|
||||
static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
|
||||
{
|
||||
arch_spinlock_t *l = (void *)lock;
|
||||
arch_spin_unlock(l);
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
|
||||
{
|
||||
atomic_t *l = (void *)lock;
|
||||
BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
|
||||
do {
|
||||
atomic_cond_read_relaxed(l, !VAL);
|
||||
} while (atomic_xchg(l, 1));
|
||||
}
|
||||
|
||||
static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
|
||||
{
|
||||
atomic_t *l = (void *)lock;
|
||||
atomic_set_release(l, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, irqsave_flags);
|
||||
notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
|
||||
{
|
||||
unsigned long flags;
|
||||
local_irq_save(flags);
|
||||
__bpf_spin_lock(lock);
|
||||
__this_cpu_write(irqsave_flags, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_spin_lock_proto = {
|
||||
.func = bpf_spin_lock,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_VOID,
|
||||
.arg1_type = ARG_PTR_TO_SPIN_LOCK,
|
||||
};
|
||||
|
||||
notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
|
||||
{
|
||||
unsigned long flags;
|
||||
flags = __this_cpu_read(irqsave_flags);
|
||||
__bpf_spin_unlock(lock);
|
||||
local_irq_restore(flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_spin_unlock_proto = {
|
||||
.func = bpf_spin_unlock,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_VOID,
|
||||
.arg1_type = ARG_PTR_TO_SPIN_LOCK,
|
||||
};
|
||||
|
||||
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
|
||||
bool lock_src)
|
||||
{
|
||||
struct bpf_spin_lock *lock;
|
||||
if (lock_src)
|
||||
lock = src + map->spin_lock_off;
|
||||
else
|
||||
lock = dst + map->spin_lock_off;
|
||||
preempt_disable();
|
||||
____bpf_spin_lock(lock);
|
||||
copy_map_value(map, dst, src);
|
||||
____bpf_spin_unlock(lock);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CGROUPS
|
||||
BPF_CALL_0(bpf_get_current_cgroup_id)
|
||||
{
|
||||
@@ -206,16 +291,25 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
|
||||
.ret_type = RET_INTEGER,
|
||||
};
|
||||
|
||||
DECLARE_PER_CPU(void*, bpf_cgroup_storage);
|
||||
|
||||
#ifdef CONFIG_CGROUP_BPF
|
||||
DECLARE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
|
||||
BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
|
||||
{
|
||||
/* map and flags arguments are not used now,
|
||||
* but provide an ability to extend the API
|
||||
* for other types of local storages.
|
||||
* verifier checks that their values are correct.
|
||||
/* flags argument is not used now,
|
||||
* but provides an ability to extend the API.
|
||||
* verifier checks that its value is correct.
|
||||
*/
|
||||
return (unsigned long) this_cpu_read(bpf_cgroup_storage);
|
||||
enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
|
||||
struct bpf_cgroup_storage *storage;
|
||||
void *ptr;
|
||||
|
||||
storage = this_cpu_read(bpf_cgroup_storage[stype]);
|
||||
|
||||
if (stype == BPF_CGROUP_STORAGE_SHARED)
|
||||
ptr = &READ_ONCE(storage->buf)->data[0];
|
||||
else
|
||||
ptr = this_cpu_ptr(storage->percpu_buf);
|
||||
return (unsigned long)ptr;
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_get_local_storage_proto = {
|
||||
@@ -226,3 +320,4 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
//SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/bpf-cgroup.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/btf.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/slab.h>
|
||||
#include <uapi/linux/btf.h>
|
||||
|
||||
DEFINE_PER_CPU(void*, bpf_cgroup_storage);
|
||||
DEFINE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
|
||||
|
||||
#ifdef CONFIG_CGROUP_BPF
|
||||
|
||||
@@ -129,7 +131,14 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
|
||||
struct bpf_cgroup_storage *storage;
|
||||
struct bpf_storage_buffer *new;
|
||||
|
||||
if (flags != BPF_ANY && flags != BPF_EXIST)
|
||||
if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST | BPF_NOEXIST)))
|
||||
return -EINVAL;
|
||||
|
||||
if (unlikely(flags & BPF_NOEXIST))
|
||||
return -EINVAL;
|
||||
|
||||
if (unlikely((flags & BPF_F_LOCK) &&
|
||||
!map_value_has_spin_lock(map)))
|
||||
return -EINVAL;
|
||||
|
||||
storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
|
||||
@@ -137,6 +146,11 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
|
||||
if (!storage)
|
||||
return -ENOENT;
|
||||
|
||||
if (flags & BPF_F_LOCK) {
|
||||
copy_map_value_locked(map, storage->buf->data, value, false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
|
||||
map->value_size,
|
||||
__GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
|
||||
@@ -152,6 +166,66 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key,
|
||||
void *value)
|
||||
{
|
||||
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
|
||||
struct bpf_cgroup_storage_key *key = _key;
|
||||
struct bpf_cgroup_storage *storage;
|
||||
int cpu, off = 0;
|
||||
u32 size;
|
||||
rcu_read_lock();
|
||||
storage = cgroup_storage_lookup(map, key, false);
|
||||
if (!storage) {
|
||||
rcu_read_unlock();
|
||||
return -ENOENT;
|
||||
}
|
||||
/* per_cpu areas are zero-filled and bpf programs can only
|
||||
* access 'value_size' of them, so copying rounded areas
|
||||
* will not leak any kernel data
|
||||
*/
|
||||
size = round_up(_map->value_size, 8);
|
||||
for_each_possible_cpu(cpu) {
|
||||
bpf_long_memcpy(value + off,
|
||||
per_cpu_ptr(storage->percpu_buf, cpu), size);
|
||||
off += size;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key,
|
||||
void *value, u64 map_flags)
|
||||
{
|
||||
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
|
||||
struct bpf_cgroup_storage_key *key = _key;
|
||||
struct bpf_cgroup_storage *storage;
|
||||
int cpu, off = 0;
|
||||
u32 size;
|
||||
if (map_flags != BPF_ANY && map_flags != BPF_EXIST)
|
||||
return -EINVAL;
|
||||
rcu_read_lock();
|
||||
storage = cgroup_storage_lookup(map, key, false);
|
||||
if (!storage) {
|
||||
rcu_read_unlock();
|
||||
return -ENOENT;
|
||||
}
|
||||
/* the user space will provide round_up(value_size, 8) bytes that
|
||||
* will be copied into per-cpu area. bpf programs can only access
|
||||
* value_size of it. During lookup the same extra bytes will be
|
||||
* returned or zeros which were zero-filled by percpu_alloc,
|
||||
* so no kernel data leaks possible
|
||||
*/
|
||||
size = round_up(_map->value_size, 8);
|
||||
for_each_possible_cpu(cpu) {
|
||||
bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu),
|
||||
value + off, size);
|
||||
off += size;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
|
||||
void *_next_key)
|
||||
{
|
||||
@@ -243,6 +317,84 @@ static int cgroup_storage_delete_elem(struct bpf_map *map, void *key)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int cgroup_storage_check_btf(const struct bpf_map *map,
|
||||
const struct btf *btf,
|
||||
const struct btf_type *key_type,
|
||||
const struct btf_type *value_type)
|
||||
{
|
||||
struct btf_member *m;
|
||||
u32 offset, size;
|
||||
|
||||
/* Key is expected to be of struct bpf_cgroup_storage_key type,
|
||||
* which is:
|
||||
* struct bpf_cgroup_storage_key {
|
||||
* __u64 cgroup_inode_id;
|
||||
* __u32 attach_type;
|
||||
* };
|
||||
*/
|
||||
/*
|
||||
* Key_type must be a structure with two fields.
|
||||
*/
|
||||
if (BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ||
|
||||
BTF_INFO_VLEN(key_type->info) != 2)
|
||||
return -EINVAL;
|
||||
/*
|
||||
* The first field must be a 64 bit integer at 0 offset.
|
||||
*/
|
||||
m = (struct btf_member *)(key_type + 1);
|
||||
size = FIELD_SIZEOF(struct bpf_cgroup_storage_key, cgroup_inode_id);
|
||||
if (!btf_member_is_reg_int(btf, key_type, m, 0, size))
|
||||
return -EINVAL;
|
||||
/*
|
||||
* The second field must be a 32 bit integer at 64 bit offset.
|
||||
*/
|
||||
m++;
|
||||
offset = offsetof(struct bpf_cgroup_storage_key, attach_type);
|
||||
size = FIELD_SIZEOF(struct bpf_cgroup_storage_key, attach_type);
|
||||
|
||||
if (!btf_member_is_reg_int(btf, key_type, m, offset, size))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *_key,
|
||||
struct seq_file *m)
|
||||
{
|
||||
enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
|
||||
struct bpf_cgroup_storage_key *key = _key;
|
||||
struct bpf_cgroup_storage *storage;
|
||||
int cpu;
|
||||
rcu_read_lock();
|
||||
storage = cgroup_storage_lookup(map_to_storage(map), key, false);
|
||||
|
||||
if (!storage) {
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
|
||||
stype = cgroup_storage_type(map);
|
||||
|
||||
if (stype == BPF_CGROUP_STORAGE_SHARED) {
|
||||
seq_puts(m, ": ");
|
||||
btf_type_seq_show(map->btf, map->btf_value_type_id,
|
||||
&READ_ONCE(storage->buf)->data[0], m);
|
||||
seq_puts(m, "\n");
|
||||
} else {
|
||||
seq_puts(m, ": {\n");
|
||||
for_each_possible_cpu(cpu) {
|
||||
seq_printf(m, "\tcpu%d: ", cpu);
|
||||
btf_type_seq_show(map->btf, map->btf_value_type_id,
|
||||
per_cpu_ptr(storage->percpu_buf, cpu),
|
||||
m);
|
||||
seq_puts(m, "\n");
|
||||
}
|
||||
seq_puts(m, "}\n");
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
const struct bpf_map_ops cgroup_storage_map_ops = {
|
||||
.map_alloc = cgroup_storage_map_alloc,
|
||||
.map_free = cgroup_storage_map_free,
|
||||
@@ -250,11 +402,13 @@ const struct bpf_map_ops cgroup_storage_map_ops = {
|
||||
.map_lookup_elem = cgroup_storage_lookup_elem,
|
||||
.map_update_elem = cgroup_storage_update_elem,
|
||||
.map_delete_elem = cgroup_storage_delete_elem,
|
||||
.map_check_btf = map_check_no_btf,
|
||||
.map_check_btf = cgroup_storage_check_btf,
|
||||
.map_seq_show_elem = cgroup_storage_seq_show_elem,
|
||||
};
|
||||
|
||||
int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
|
||||
{
|
||||
enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
|
||||
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
|
||||
int ret = -EBUSY;
|
||||
|
||||
@@ -262,11 +416,12 @@ int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
|
||||
|
||||
if (map->prog && map->prog != prog)
|
||||
goto unlock;
|
||||
if (prog->aux->cgroup_storage && prog->aux->cgroup_storage != _map)
|
||||
if (prog->aux->cgroup_storage[stype] &&
|
||||
prog->aux->cgroup_storage[stype] != _map)
|
||||
goto unlock;
|
||||
|
||||
map->prog = prog;
|
||||
prog->aux->cgroup_storage = _map;
|
||||
prog->aux->cgroup_storage[stype] = _map;
|
||||
ret = 0;
|
||||
unlock:
|
||||
spin_unlock_bh(&map->lock);
|
||||
@@ -276,70 +431,110 @@ int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
|
||||
|
||||
void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)
|
||||
{
|
||||
enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
|
||||
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
|
||||
|
||||
spin_lock_bh(&map->lock);
|
||||
if (map->prog == prog) {
|
||||
WARN_ON(prog->aux->cgroup_storage != _map);
|
||||
WARN_ON(prog->aux->cgroup_storage[stype] != _map);
|
||||
map->prog = NULL;
|
||||
prog->aux->cgroup_storage = NULL;
|
||||
prog->aux->cgroup_storage[stype] = NULL;
|
||||
}
|
||||
spin_unlock_bh(&map->lock);
|
||||
}
|
||||
|
||||
struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog)
|
||||
static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages)
|
||||
{
|
||||
size_t size;
|
||||
if (cgroup_storage_type(map) == BPF_CGROUP_STORAGE_SHARED) {
|
||||
size = sizeof(struct bpf_storage_buffer) + map->value_size;
|
||||
*pages = round_up(sizeof(struct bpf_cgroup_storage) + size,
|
||||
PAGE_SIZE) >> PAGE_SHIFT;
|
||||
} else {
|
||||
size = map->value_size;
|
||||
*pages = round_up(round_up(size, 8) * num_possible_cpus(),
|
||||
PAGE_SIZE) >> PAGE_SHIFT;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
|
||||
enum bpf_cgroup_storage_type stype)
|
||||
{
|
||||
struct bpf_cgroup_storage *storage;
|
||||
struct bpf_map *map;
|
||||
gfp_t flags;
|
||||
size_t size;
|
||||
u32 pages;
|
||||
|
||||
map = prog->aux->cgroup_storage;
|
||||
map = prog->aux->cgroup_storage[stype];
|
||||
if (!map)
|
||||
return NULL;
|
||||
|
||||
pages = round_up(sizeof(struct bpf_cgroup_storage) +
|
||||
sizeof(struct bpf_storage_buffer) +
|
||||
map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
|
||||
size = bpf_cgroup_storage_calculate_size(map, &pages);
|
||||
if (bpf_map_charge_memlock(map, pages))
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
storage = kmalloc_node(sizeof(struct bpf_cgroup_storage),
|
||||
__GFP_ZERO | GFP_USER, map->numa_node);
|
||||
if (!storage) {
|
||||
bpf_map_uncharge_memlock(map, pages);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
storage->buf = kmalloc_node(sizeof(struct bpf_storage_buffer) +
|
||||
map->value_size, __GFP_ZERO | GFP_USER,
|
||||
map->numa_node);
|
||||
if (!storage->buf) {
|
||||
bpf_map_uncharge_memlock(map, pages);
|
||||
kfree(storage);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
if (!storage)
|
||||
goto enomem;
|
||||
|
||||
flags = __GFP_ZERO | GFP_USER;
|
||||
if (stype == BPF_CGROUP_STORAGE_SHARED) {
|
||||
storage->buf = kmalloc_node(size, flags, map->numa_node);
|
||||
if (!storage->buf)
|
||||
goto enomem;
|
||||
} else {
|
||||
storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags);
|
||||
if (!storage->percpu_buf)
|
||||
goto enomem;
|
||||
}
|
||||
|
||||
storage->map = (struct bpf_cgroup_storage_map *)map;
|
||||
|
||||
return storage;
|
||||
enomem:
|
||||
bpf_map_uncharge_memlock(map, pages);
|
||||
kfree(storage);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
static void free_shared_cgroup_storage_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct bpf_cgroup_storage *storage =
|
||||
container_of(rcu, struct bpf_cgroup_storage, rcu);
|
||||
kfree(storage->buf);
|
||||
kfree(storage);
|
||||
}
|
||||
|
||||
static void free_percpu_cgroup_storage_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct bpf_cgroup_storage *storage =
|
||||
container_of(rcu, struct bpf_cgroup_storage, rcu);
|
||||
free_percpu(storage->percpu_buf);
|
||||
kfree(storage);
|
||||
}
|
||||
|
||||
void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage)
|
||||
{
|
||||
u32 pages;
|
||||
enum bpf_cgroup_storage_type stype;
|
||||
struct bpf_map *map;
|
||||
u32 pages;
|
||||
|
||||
if (!storage)
|
||||
return;
|
||||
|
||||
map = &storage->map->map;
|
||||
pages = round_up(sizeof(struct bpf_cgroup_storage) +
|
||||
sizeof(struct bpf_storage_buffer) +
|
||||
map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
|
||||
bpf_cgroup_storage_calculate_size(map, &pages);
|
||||
bpf_map_uncharge_memlock(map, pages);
|
||||
|
||||
kfree_rcu(storage->buf, rcu);
|
||||
kfree_rcu(storage, rcu);
|
||||
stype = cgroup_storage_type(map);
|
||||
if (stype == BPF_CGROUP_STORAGE_SHARED)
|
||||
call_rcu(&storage->rcu, free_shared_cgroup_storage_rcu);
|
||||
else
|
||||
call_rcu(&storage->rcu, free_percpu_cgroup_storage_rcu);
|
||||
}
|
||||
|
||||
void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
|
||||
|
||||
@@ -698,6 +698,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
|
||||
}
|
||||
|
||||
static int trie_check_btf(const struct bpf_map *map,
|
||||
const struct btf *btf,
|
||||
const struct btf_type *key_type,
|
||||
const struct btf_type *value_type)
|
||||
{
|
||||
|
||||
@@ -36,6 +36,11 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
if (map_value_has_spin_lock(inner_map)) {
|
||||
fdput(f);
|
||||
return ERR_PTR(-ENOTSUPP);
|
||||
}
|
||||
|
||||
inner_map_meta_size = sizeof(*inner_map_meta);
|
||||
/* In some cases verifier needs to access beyond just base map. */
|
||||
if (inner_map->ops == &array_map_ops)
|
||||
|
||||
@@ -270,14 +270,14 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
|
||||
struct inode *ns_inode;
|
||||
struct path ns_path;
|
||||
char __user *uinsns;
|
||||
void *res;
|
||||
int res;
|
||||
u32 ulen;
|
||||
|
||||
res = ns_get_path_cb(&ns_path, bpf_prog_offload_info_fill_ns, &args);
|
||||
if (IS_ERR(res)) {
|
||||
if (res) {
|
||||
if (!info->ifindex)
|
||||
return -ENODEV;
|
||||
return PTR_ERR(res);
|
||||
return res;
|
||||
}
|
||||
|
||||
down_read(&bpf_devs_lock);
|
||||
@@ -494,13 +494,12 @@ int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map)
|
||||
};
|
||||
struct inode *ns_inode;
|
||||
struct path ns_path;
|
||||
void *res;
|
||||
|
||||
int res;
|
||||
res = ns_get_path_cb(&ns_path, bpf_map_offload_info_fill_ns, &args);
|
||||
if (IS_ERR(res)) {
|
||||
if (res) {
|
||||
if (!info->ifindex)
|
||||
return -ENODEV;
|
||||
return PTR_ERR(res);
|
||||
return res;
|
||||
}
|
||||
|
||||
ns_inode = ns_path.dentry->d_inode;
|
||||
|
||||
2631
kernel/bpf/sockmap.c
2631
kernel/bpf/sockmap.c
File diff suppressed because it is too large
Load Diff
@@ -458,13 +458,14 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
|
||||
}
|
||||
|
||||
int map_check_no_btf(const struct bpf_map *map,
|
||||
const struct btf *btf,
|
||||
const struct btf_type *key_type,
|
||||
const struct btf_type *value_type)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
static int map_check_btf(const struct bpf_map *map, const struct btf *btf,
|
||||
static int map_check_btf(struct bpf_map *map, const struct btf *btf,
|
||||
u32 btf_key_id, u32 btf_value_id)
|
||||
{
|
||||
const struct btf_type *key_type, *value_type;
|
||||
@@ -479,8 +480,24 @@ static int map_check_btf(const struct bpf_map *map, const struct btf *btf,
|
||||
if (!value_type || value_size != map->value_size)
|
||||
return -EINVAL;
|
||||
|
||||
map->spin_lock_off = btf_find_spin_lock(btf, value_type);
|
||||
|
||||
if (map_value_has_spin_lock(map)) {
|
||||
if (map->map_type != BPF_MAP_TYPE_HASH &&
|
||||
map->map_type != BPF_MAP_TYPE_ARRAY &&
|
||||
map->map_type != BPF_MAP_TYPE_SK_STORAGE)
|
||||
return -ENOTSUPP;
|
||||
if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
|
||||
map->value_size) {
|
||||
WARN_ONCE(1,
|
||||
"verifier bug spin_lock_off %d value_size %d\n",
|
||||
map->spin_lock_off, map->value_size);
|
||||
return -EFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
if (map->ops->map_check_btf)
|
||||
ret = map->ops->map_check_btf(map, key_type, value_type);
|
||||
ret = map->ops->map_check_btf(map, btf, key_type, value_type);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -543,6 +560,8 @@ static int map_create(union bpf_attr *attr)
|
||||
map->btf = btf;
|
||||
map->btf_key_type_id = attr->btf_key_type_id;
|
||||
map->btf_value_type_id = attr->btf_value_type_id;
|
||||
} else {
|
||||
map->spin_lock_off = -EINVAL;
|
||||
}
|
||||
|
||||
err = security_bpf_map_alloc(map);
|
||||
@@ -654,7 +673,7 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
|
||||
}
|
||||
|
||||
/* last field in 'union bpf_attr' used by this command */
|
||||
#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
|
||||
#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
|
||||
|
||||
static int map_lookup_elem(union bpf_attr *attr)
|
||||
{
|
||||
@@ -670,6 +689,9 @@ static int map_lookup_elem(union bpf_attr *attr)
|
||||
if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
|
||||
return -EINVAL;
|
||||
|
||||
if (attr->flags & ~BPF_F_LOCK)
|
||||
return -EINVAL;
|
||||
|
||||
f = fdget(ufd);
|
||||
map = __bpf_map_get(f);
|
||||
if (IS_ERR(map))
|
||||
@@ -680,6 +702,12 @@ static int map_lookup_elem(union bpf_attr *attr)
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
if ((attr->flags & BPF_F_LOCK) &&
|
||||
!map_value_has_spin_lock(map)) {
|
||||
err = -EINVAL;
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
key = memdup_user(ukey, map->key_size);
|
||||
if (IS_ERR(key)) {
|
||||
err = PTR_ERR(key);
|
||||
@@ -688,7 +716,8 @@ static int map_lookup_elem(union bpf_attr *attr)
|
||||
|
||||
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
|
||||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
|
||||
map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
|
||||
map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
|
||||
map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
|
||||
value_size = round_up(map->value_size, 8) * num_possible_cpus();
|
||||
else if (IS_FD_MAP(map))
|
||||
value_size = sizeof(u32);
|
||||
@@ -712,8 +741,13 @@ static int map_lookup_elem(union bpf_attr *attr)
|
||||
err = bpf_percpu_hash_copy(map, key, value);
|
||||
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
|
||||
err = bpf_percpu_array_copy(map, key, value);
|
||||
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
|
||||
err = bpf_percpu_cgroup_storage_copy(map, key, value);
|
||||
} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
|
||||
err = bpf_stackmap_copy(map, key, value);
|
||||
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
|
||||
err = bpf_percpu_cgroup_storage_update(map, key, value,
|
||||
attr->flags);
|
||||
} else if (IS_FD_ARRAY(map)) {
|
||||
err = bpf_fd_array_map_lookup_elem(map, key, value);
|
||||
} else if (IS_FD_HASH(map)) {
|
||||
@@ -726,8 +760,15 @@ static int map_lookup_elem(union bpf_attr *attr)
|
||||
ptr = map->ops->map_lookup_elem_sys_only(map, key);
|
||||
else
|
||||
ptr = map->ops->map_lookup_elem(map, key);
|
||||
if (ptr)
|
||||
memcpy(value, ptr, value_size);
|
||||
if (ptr) {
|
||||
if (attr->flags & BPF_F_LOCK)
|
||||
/* lock 'ptr' and copy everything but lock */
|
||||
copy_map_value_locked(map, value, ptr, true);
|
||||
else
|
||||
copy_map_value(map, value, ptr);
|
||||
/* mask lock, since value wasn't zero inited */
|
||||
check_and_init_map_lock(map, value);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
err = ptr ? 0 : -ENOENT;
|
||||
}
|
||||
@@ -790,6 +831,12 @@ static int map_update_elem(union bpf_attr *attr)
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
if ((attr->flags & BPF_F_LOCK) &&
|
||||
!map_value_has_spin_lock(map)) {
|
||||
err = -EINVAL;
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
key = memdup_user(ukey, map->key_size);
|
||||
if (IS_ERR(key)) {
|
||||
err = PTR_ERR(key);
|
||||
@@ -798,7 +845,8 @@ static int map_update_elem(union bpf_attr *attr)
|
||||
|
||||
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
|
||||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
|
||||
map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
|
||||
map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
|
||||
map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
|
||||
value_size = round_up(map->value_size, 8) * num_possible_cpus();
|
||||
else
|
||||
value_size = map->value_size;
|
||||
@@ -1014,10 +1062,15 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
|
||||
/* drop refcnt on maps used by eBPF program and free auxilary data */
|
||||
static void free_used_maps(struct bpf_prog_aux *aux)
|
||||
{
|
||||
enum bpf_cgroup_storage_type stype;
|
||||
int i;
|
||||
|
||||
if (aux->cgroup_storage)
|
||||
bpf_cgroup_storage_release(aux->prog, aux->cgroup_storage);
|
||||
for_each_cgroup_storage_type(stype) {
|
||||
if (!aux->cgroup_storage[stype])
|
||||
continue;
|
||||
bpf_cgroup_storage_release(aux->prog,
|
||||
aux->cgroup_storage[stype]);
|
||||
}
|
||||
|
||||
for (i = 0; i < aux->used_map_cnt; i++)
|
||||
bpf_map_put(aux->used_maps[i]);
|
||||
@@ -1128,7 +1181,10 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
|
||||
if (atomic_dec_and_test(&prog->aux->refcnt)) {
|
||||
/* bpf_prog_free_id() must be called first */
|
||||
bpf_prog_free_id(prog, do_idr_lock);
|
||||
bpf_prog_kallsyms_del_all(prog);
|
||||
bpf_prog_kallsyms_del(prog);
|
||||
btf_put(prog->aux->btf);
|
||||
kvfree(prog->aux->func_info);
|
||||
bpf_prog_free_linfo(prog);
|
||||
|
||||
call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
|
||||
}
|
||||
@@ -1349,15 +1405,23 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
|
||||
switch (expected_attach_type) {
|
||||
case BPF_CGROUP_SETSOCKOPT:
|
||||
case BPF_CGROUP_GETSOCKOPT:
|
||||
return 0;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* last field in 'union bpf_attr' used by this command */
|
||||
#define BPF_PROG_LOAD_LAST_FIELD expected_attach_type
|
||||
#define BPF_PROG_LOAD_LAST_FIELD line_info_cnt
|
||||
|
||||
static int bpf_prog_load(union bpf_attr *attr)
|
||||
static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
|
||||
{
|
||||
enum bpf_prog_type type = attr->prog_type;
|
||||
struct bpf_prog *prog;
|
||||
@@ -1448,7 +1512,7 @@ static int bpf_prog_load(union bpf_attr *attr)
|
||||
goto free_prog;
|
||||
|
||||
/* run eBPF verifier */
|
||||
err = bpf_check(&prog, attr);
|
||||
err = bpf_check(&prog, attr, uattr);
|
||||
if (err < 0)
|
||||
goto free_used_maps;
|
||||
|
||||
@@ -1482,6 +1546,9 @@ static int bpf_prog_load(union bpf_attr *attr)
|
||||
return err;
|
||||
|
||||
free_used_maps:
|
||||
bpf_prog_free_linfo(prog);
|
||||
kvfree(prog->aux->func_info);
|
||||
btf_put(prog->aux->btf);
|
||||
bpf_prog_kallsyms_del_subprogs(prog);
|
||||
free_used_maps(prog->aux);
|
||||
free_prog:
|
||||
@@ -1560,13 +1627,18 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
|
||||
return -ENOMEM;
|
||||
raw_tp->btp = btp;
|
||||
|
||||
prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
|
||||
BPF_PROG_TYPE_RAW_TRACEPOINT);
|
||||
prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
|
||||
if (IS_ERR(prog)) {
|
||||
err = PTR_ERR(prog);
|
||||
goto out_free_tp;
|
||||
}
|
||||
|
||||
if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
|
||||
prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
|
||||
err = -EINVAL;
|
||||
goto out_put_prog;
|
||||
}
|
||||
|
||||
err = bpf_probe_register(raw_tp->btp, prog);
|
||||
if (err)
|
||||
goto out_put_prog;
|
||||
@@ -1594,6 +1666,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
|
||||
switch (prog->type) {
|
||||
case BPF_PROG_TYPE_CGROUP_SOCK:
|
||||
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
|
||||
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
|
||||
return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
|
||||
default:
|
||||
return 0;
|
||||
@@ -1647,14 +1720,23 @@ static int bpf_prog_attach(const union bpf_attr *attr)
|
||||
ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
|
||||
break;
|
||||
case BPF_SK_MSG_VERDICT:
|
||||
ptype = BPF_PROG_TYPE_SK_MSG;
|
||||
ret = sock_map_get_from_fd(attr, prog);
|
||||
break;
|
||||
case BPF_SK_SKB_STREAM_PARSER:
|
||||
case BPF_SK_SKB_STREAM_VERDICT:
|
||||
ptype = BPF_PROG_TYPE_SK_SKB;
|
||||
ret = sock_map_get_from_fd(attr, prog);
|
||||
break;
|
||||
case BPF_FLOW_DISSECTOR:
|
||||
ptype = BPF_PROG_TYPE_FLOW_DISSECTOR;
|
||||
break;
|
||||
case BPF_LIRC_MODE2:
|
||||
ptype = BPF_PROG_TYPE_LIRC_MODE2;
|
||||
case BPF_CGROUP_SYSCTL:
|
||||
ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
|
||||
break;
|
||||
case BPF_CGROUP_GETSOCKOPT:
|
||||
case BPF_CGROUP_SETSOCKOPT:
|
||||
ptype = BPF_PROG_TYPE_CGROUP_SOCKOPT;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
@@ -1672,7 +1754,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
|
||||
switch (ptype) {
|
||||
case BPF_PROG_TYPE_SK_SKB:
|
||||
case BPF_PROG_TYPE_SK_MSG:
|
||||
ret = sockmap_get_from_fd(attr, ptype, prog);
|
||||
ret = sock_map_get_from_fd(attr, prog);
|
||||
break;
|
||||
case BPF_PROG_TYPE_LIRC_MODE2:
|
||||
ret = lirc_prog_attach(attr, prog);
|
||||
@@ -1725,12 +1807,21 @@ static int bpf_prog_detach(const union bpf_attr *attr)
|
||||
ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
|
||||
break;
|
||||
case BPF_SK_MSG_VERDICT:
|
||||
return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, NULL);
|
||||
return sock_map_get_from_fd(attr, NULL);
|
||||
case BPF_SK_SKB_STREAM_PARSER:
|
||||
case BPF_SK_SKB_STREAM_VERDICT:
|
||||
return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL);
|
||||
return sock_map_get_from_fd(attr, NULL);
|
||||
case BPF_FLOW_DISSECTOR:
|
||||
return skb_flow_dissector_bpf_prog_detach(attr);
|
||||
case BPF_LIRC_MODE2:
|
||||
return lirc_prog_detach(attr);
|
||||
case BPF_CGROUP_SYSCTL:
|
||||
ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
|
||||
break;
|
||||
case BPF_CGROUP_GETSOCKOPT:
|
||||
case BPF_CGROUP_SETSOCKOPT:
|
||||
ptype = BPF_PROG_TYPE_CGROUP_SOCKOPT;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -1766,6 +1857,9 @@ static int bpf_prog_query(const union bpf_attr *attr,
|
||||
case BPF_CGROUP_UDP6_RECVMSG:
|
||||
case BPF_CGROUP_SOCK_OPS:
|
||||
case BPF_CGROUP_DEVICE:
|
||||
case BPF_CGROUP_SYSCTL:
|
||||
case BPF_CGROUP_GETSOCKOPT:
|
||||
case BPF_CGROUP_SETSOCKOPT:
|
||||
break;
|
||||
case BPF_LIRC_MODE2:
|
||||
return lirc_prog_query(attr, uattr);
|
||||
@@ -1899,13 +1993,26 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
|
||||
}
|
||||
|
||||
static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
|
||||
unsigned long addr)
|
||||
unsigned long addr, u32 *off,
|
||||
u32 *type)
|
||||
{
|
||||
const struct bpf_map *map;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < prog->aux->used_map_cnt; i++)
|
||||
if (prog->aux->used_maps[i] == (void *)addr)
|
||||
return prog->aux->used_maps[i];
|
||||
for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
|
||||
map = prog->aux->used_maps[i];
|
||||
if (map == (void *)addr) {
|
||||
*type = BPF_PSEUDO_MAP_FD;
|
||||
return map;
|
||||
}
|
||||
if (!map->ops->map_direct_value_meta)
|
||||
continue;
|
||||
if (!map->ops->map_direct_value_meta(map, addr, off)) {
|
||||
*type = BPF_PSEUDO_MAP_VALUE;
|
||||
return map;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -1914,6 +2021,7 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
|
||||
{
|
||||
const struct bpf_map *map;
|
||||
struct bpf_insn *insns;
|
||||
u32 off, type;
|
||||
u64 imm;
|
||||
int i;
|
||||
|
||||
@@ -1941,11 +2049,12 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
|
||||
continue;
|
||||
|
||||
imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm;
|
||||
map = bpf_map_from_imm(prog, imm);
|
||||
map = bpf_map_from_imm(prog, imm, &off, &type);
|
||||
|
||||
if (map) {
|
||||
insns[i].src_reg = BPF_PSEUDO_MAP_FD;
|
||||
insns[i].src_reg = type;
|
||||
insns[i].imm = map->id;
|
||||
insns[i + 1].imm = 0;
|
||||
insns[i + 1].imm = off;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -1960,6 +2069,32 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
|
||||
return insns;
|
||||
}
|
||||
|
||||
static int set_info_rec_size(struct bpf_prog_info *info)
|
||||
{
|
||||
/*
|
||||
* Ensure info.*_rec_size is the same as kernel expected size
|
||||
*
|
||||
* or
|
||||
*
|
||||
* Only allow zero *_rec_size if both _rec_size and _cnt are
|
||||
* zero. In this case, the kernel will set the expected
|
||||
* _rec_size back to the info.
|
||||
*/
|
||||
if ((info->func_info_cnt || info->func_info_rec_size) &&
|
||||
info->func_info_rec_size != sizeof(struct bpf_func_info))
|
||||
return -EINVAL;
|
||||
if ((info->line_info_cnt || info->line_info_rec_size) &&
|
||||
info->line_info_rec_size != sizeof(struct bpf_line_info))
|
||||
return -EINVAL;
|
||||
if ((info->jited_line_info_cnt || info->jited_line_info_rec_size) &&
|
||||
info->jited_line_info_rec_size != sizeof(__u64))
|
||||
return -EINVAL;
|
||||
info->func_info_rec_size = sizeof(struct bpf_func_info);
|
||||
info->line_info_rec_size = sizeof(struct bpf_line_info);
|
||||
info->jited_line_info_rec_size = sizeof(__u64);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_prog_get_info_by_fd(struct file *file,
|
||||
struct bpf_prog *prog,
|
||||
const union bpf_attr *attr,
|
||||
@@ -2004,11 +2139,18 @@ static int bpf_prog_get_info_by_fd(struct file *file,
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
err = set_info_rec_size(&info);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
info.jited_prog_len = 0;
|
||||
info.xlated_prog_len = 0;
|
||||
info.nr_jited_ksyms = 0;
|
||||
info.nr_jited_func_lens = 0;
|
||||
info.func_info_cnt = 0;
|
||||
info.line_info_cnt = 0;
|
||||
info.jited_line_info_cnt = 0;
|
||||
goto done;
|
||||
}
|
||||
|
||||
@@ -2132,6 +2274,71 @@ static int bpf_prog_get_info_by_fd(struct file *file,
|
||||
}
|
||||
}
|
||||
|
||||
if (prog->aux->btf) {
|
||||
u32 krec_size = sizeof(struct bpf_func_info);
|
||||
u32 ucnt, urec_size;
|
||||
|
||||
info.btf_id = btf_id(prog->aux->btf);
|
||||
|
||||
ucnt = info.func_info_cnt;
|
||||
info.func_info_cnt = prog->aux->func_info_cnt;
|
||||
urec_size = info.func_info_rec_size;
|
||||
info.func_info_rec_size = krec_size;
|
||||
if (ucnt) {
|
||||
/* expect passed-in urec_size is what the kernel expects */
|
||||
if (urec_size != info.func_info_rec_size)
|
||||
return -EINVAL;
|
||||
|
||||
if (bpf_dump_raw_ok(file->f_cred)) {
|
||||
char __user *user_finfo;
|
||||
user_finfo = u64_to_user_ptr(info.func_info);
|
||||
ucnt = min_t(u32, info.func_info_cnt, ucnt);
|
||||
if (copy_to_user(user_finfo, prog->aux->func_info,
|
||||
krec_size * ucnt))
|
||||
return -EFAULT;
|
||||
} else {
|
||||
info.func_info_cnt = 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
info.func_info_cnt = 0;
|
||||
}
|
||||
ulen = info.line_info_cnt;
|
||||
info.line_info_cnt = prog->aux->nr_linfo;
|
||||
if (info.line_info_cnt && ulen) {
|
||||
if (bpf_dump_raw_ok(file->f_cred)) {
|
||||
__u8 __user *user_linfo;
|
||||
user_linfo = u64_to_user_ptr(info.line_info);
|
||||
ulen = min_t(u32, info.line_info_cnt, ulen);
|
||||
if (copy_to_user(user_linfo, prog->aux->linfo,
|
||||
info.line_info_rec_size * ulen))
|
||||
return -EFAULT;
|
||||
} else {
|
||||
info.line_info = 0;
|
||||
}
|
||||
}
|
||||
|
||||
ulen = info.jited_line_info_cnt;
|
||||
if (prog->aux->jited_linfo)
|
||||
info.jited_line_info_cnt = prog->aux->nr_linfo;
|
||||
else
|
||||
info.jited_line_info_cnt = 0;
|
||||
if (info.jited_line_info_cnt && ulen) {
|
||||
if (bpf_dump_raw_ok(file->f_cred)) {
|
||||
__u64 __user *user_linfo;
|
||||
u32 i;
|
||||
user_linfo = u64_to_user_ptr(info.jited_line_info);
|
||||
ulen = min_t(u32, info.jited_line_info_cnt, ulen);
|
||||
for (i = 0; i < ulen; i++) {
|
||||
if (put_user((__u64)(long)prog->aux->jited_linfo[i],
|
||||
&user_linfo[i]))
|
||||
return -EFAULT;
|
||||
}
|
||||
} else {
|
||||
info.jited_line_info = 0;
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
if (copy_to_user(uinfo, &info, info_len) ||
|
||||
put_user(info_len, &uattr->info.info_len))
|
||||
@@ -2423,7 +2630,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
|
||||
err = map_get_next_key(&attr);
|
||||
break;
|
||||
case BPF_PROG_LOAD:
|
||||
err = bpf_prog_load(&attr);
|
||||
err = bpf_prog_load(&attr, uattr);
|
||||
break;
|
||||
case BPF_OBJ_PIN:
|
||||
err = bpf_obj_pin(&attr);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -7411,7 +7411,7 @@ static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
|
||||
{
|
||||
struct path ns_path;
|
||||
struct inode *ns_inode;
|
||||
void *error;
|
||||
int error;
|
||||
|
||||
error = ns_get_path(&ns_path, task, ns_ops);
|
||||
if (!error) {
|
||||
|
||||
@@ -1246,6 +1246,13 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
|
||||
|
||||
down_read(&uts_sem);
|
||||
memcpy(&tmp, utsname(), sizeof(tmp));
|
||||
if (!strncmp(current->comm, "bpfloader", 9) ||
|
||||
!strncmp(current->comm, "netbpfload", 10) ||
|
||||
!strncmp(current->comm, "netd", 4)) {
|
||||
strcpy(tmp.release, "5.4.186");
|
||||
pr_debug("fake uname: %s/%d release=%s\n",
|
||||
current->comm, current->pid, tmp.release);
|
||||
}
|
||||
up_read(&uts_sem);
|
||||
if (copy_to_user(name, &tmp, sizeof(tmp)))
|
||||
return -EFAULT;
|
||||
|
||||
@@ -975,6 +975,27 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
|
||||
const struct bpf_prog_ops raw_tracepoint_prog_ops = {
|
||||
};
|
||||
|
||||
static bool raw_tp_writable_prog_is_valid_access(int off, int size,
|
||||
enum bpf_access_type type,
|
||||
const struct bpf_prog *prog,
|
||||
struct bpf_insn_access_aux *info)
|
||||
{
|
||||
if (off == 0) {
|
||||
if (size != sizeof(u64) || type != BPF_READ)
|
||||
return false;
|
||||
info->reg_type = PTR_TO_TP_BUFFER;
|
||||
}
|
||||
return raw_tp_prog_is_valid_access(off, size, type, prog, info);
|
||||
}
|
||||
|
||||
const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = {
|
||||
.get_func_proto = raw_tp_prog_func_proto,
|
||||
.is_valid_access = raw_tp_writable_prog_is_valid_access,
|
||||
};
|
||||
|
||||
const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = {
|
||||
};
|
||||
|
||||
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
|
||||
const struct bpf_prog *prog,
|
||||
struct bpf_insn_access_aux *info)
|
||||
@@ -1255,8 +1276,10 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
|
||||
if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
|
||||
return -EINVAL;
|
||||
|
||||
return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func,
|
||||
prog);
|
||||
if (prog->aux->max_tp_access > btp->writable_size)
|
||||
return -EINVAL;
|
||||
|
||||
return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog);
|
||||
}
|
||||
|
||||
int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
|
||||
|
||||
14
net/Kconfig
14
net/Kconfig
@@ -51,6 +51,9 @@ config NET_INGRESS
|
||||
config NET_EGRESS
|
||||
bool
|
||||
|
||||
config SKB_EXTENSIONS
|
||||
bool
|
||||
|
||||
menu "Networking options"
|
||||
|
||||
source "net/packet/Kconfig"
|
||||
@@ -297,8 +300,11 @@ config BPF_JIT
|
||||
|
||||
config BPF_STREAM_PARSER
|
||||
bool "enable BPF STREAM_PARSER"
|
||||
depends on INET
|
||||
depends on BPF_SYSCALL
|
||||
depends on CGROUP_BPF
|
||||
select STREAM_PARSER
|
||||
select NET_SOCK_MSG
|
||||
---help---
|
||||
Enabling this allows a stream parser to be used with
|
||||
BPF_MAP_TYPE_SOCKMAP.
|
||||
@@ -419,6 +425,14 @@ config GRO_CELLS
|
||||
config SOCK_VALIDATE_XMIT
|
||||
bool
|
||||
|
||||
config NET_SOCK_MSG
|
||||
bool
|
||||
default n
|
||||
help
|
||||
The NET_SOCK_MSG provides a framework for plain sockets (e.g. TCP) or
|
||||
ULPs (upper layer modules, e.g. TLS) to process L7 application data
|
||||
with the help of BPF programs.
|
||||
|
||||
config NET_DEVLINK
|
||||
tristate "Network physical/parent device Netlink interface"
|
||||
help
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <linux/etherdevice.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <net/bpf_sk_storage.h>
|
||||
|
||||
static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
|
||||
struct bpf_cgroup_storage *storage)
|
||||
@@ -18,7 +19,6 @@ static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
|
||||
|
||||
preempt_disable();
|
||||
rcu_read_lock();
|
||||
bpf_cgroup_storage_set(storage);
|
||||
ret = BPF_PROG_RUN(prog, ctx);
|
||||
rcu_read_unlock();
|
||||
preempt_enable();
|
||||
@@ -32,10 +32,6 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
|
||||
u64 time_start, time_spent = 0;
|
||||
u32 ret = 0, i;
|
||||
|
||||
storage = bpf_cgroup_storage_alloc(prog);
|
||||
if (IS_ERR(storage))
|
||||
return PTR_ERR(storage);
|
||||
|
||||
if (!repeat)
|
||||
repeat = 1;
|
||||
time_start = ktime_get_ns();
|
||||
|
||||
@@ -16,6 +16,7 @@ obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
|
||||
obj-y += net-sysfs.o
|
||||
obj-$(CONFIG_PAGE_POOL) += page_pool.o
|
||||
obj-$(CONFIG_PROC_FS) += net-procfs.o
|
||||
obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
|
||||
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
|
||||
obj-$(CONFIG_NETPOLL) += netpoll.o
|
||||
obj-$(CONFIG_FIB_RULES) += fib_rules.o
|
||||
@@ -28,8 +29,10 @@ obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
|
||||
obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
|
||||
obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o
|
||||
obj-$(CONFIG_SOCKEV_NLMCAST) += sockev_nlmcast.o
|
||||
obj-$(CONFIG_BPF_STREAM_PARSER) += sock_map.o
|
||||
obj-$(CONFIG_DST_CACHE) += dst_cache.o
|
||||
obj-$(CONFIG_HWBM) += hwbm.o
|
||||
obj-$(CONFIG_NET_DEVLINK) += devlink.o
|
||||
obj-$(CONFIG_GRO_CELLS) += gro_cells.o
|
||||
obj-$(CONFIG_FAILOVER) += failover.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
|
||||
|
||||
812
net/core/bpf_sk_storage.c
Normal file
812
net/core/bpf_sk_storage.c
Normal file
@@ -0,0 +1,812 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2019 Facebook */
|
||||
#include <linux/rculist.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <net/bpf_sk_storage.h>
|
||||
#include <net/sock.h>
|
||||
#include <uapi/linux/btf.h>
|
||||
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/mm.h>
|
||||
static inline void *__compat_kvcalloc(size_t n, size_t size, gfp_t flags)
|
||||
{
|
||||
return kvmalloc_array(n, size, flags | __GFP_ZERO);
|
||||
}
|
||||
#define kvcalloc __compat_kvcalloc
|
||||
|
||||
static atomic_t cache_idx;
|
||||
|
||||
struct bucket {
|
||||
struct hlist_head list;
|
||||
raw_spinlock_t lock;
|
||||
};
|
||||
|
||||
/* Thp map is not the primary owner of a bpf_sk_storage_elem.
|
||||
* Instead, the sk->sk_bpf_storage is.
|
||||
*
|
||||
* The map (bpf_sk_storage_map) is for two purposes
|
||||
* 1. Define the size of the "sk local storage". It is
|
||||
* the map's value_size.
|
||||
*
|
||||
* 2. Maintain a list to keep track of all elems such
|
||||
* that they can be cleaned up during the map destruction.
|
||||
*
|
||||
* When a bpf local storage is being looked up for a
|
||||
* particular sk, the "bpf_map" pointer is actually used
|
||||
* as the "key" to search in the list of elem in
|
||||
* sk->sk_bpf_storage.
|
||||
*
|
||||
* Hence, consider sk->sk_bpf_storage is the mini-map
|
||||
* with the "bpf_map" pointer as the searching key.
|
||||
*/
|
||||
struct bpf_sk_storage_map {
|
||||
struct bpf_map map;
|
||||
/* Lookup elem does not require accessing the map.
|
||||
*
|
||||
* Updating/Deleting requires a bucket lock to
|
||||
* link/unlink the elem from the map. Having
|
||||
* multiple buckets to improve contention.
|
||||
*/
|
||||
struct bucket *buckets;
|
||||
u32 bucket_log;
|
||||
u16 elem_size;
|
||||
u16 cache_idx;
|
||||
};
|
||||
|
||||
struct bpf_sk_storage_data {
|
||||
/* smap is used as the searching key when looking up
|
||||
* from sk->sk_bpf_storage.
|
||||
*
|
||||
* Put it in the same cacheline as the data to minimize
|
||||
* the number of cachelines access during the cache hit case.
|
||||
*/
|
||||
struct bpf_sk_storage_map __rcu *smap;
|
||||
u8 data[0] __aligned(8);
|
||||
};
|
||||
|
||||
/* Linked to bpf_sk_storage and bpf_sk_storage_map */
|
||||
struct bpf_sk_storage_elem {
|
||||
struct hlist_node map_node; /* Linked to bpf_sk_storage_map */
|
||||
struct hlist_node snode; /* Linked to bpf_sk_storage */
|
||||
struct bpf_sk_storage __rcu *sk_storage;
|
||||
struct rcu_head rcu;
|
||||
/* 8 bytes hole */
|
||||
/* The data is stored in aother cacheline to minimize
|
||||
* the number of cachelines access during a cache hit.
|
||||
*/
|
||||
struct bpf_sk_storage_data sdata ____cacheline_aligned;
|
||||
};
|
||||
|
||||
#define SELEM(_SDATA) container_of((_SDATA), struct bpf_sk_storage_elem, sdata)
|
||||
#define SDATA(_SELEM) (&(_SELEM)->sdata)
|
||||
#define BPF_SK_STORAGE_CACHE_SIZE 16
|
||||
|
||||
struct bpf_sk_storage {
|
||||
struct bpf_sk_storage_data __rcu *cache[BPF_SK_STORAGE_CACHE_SIZE];
|
||||
struct hlist_head list; /* List of bpf_sk_storage_elem */
|
||||
struct sock *sk; /* The sk that owns the the above "list" of
|
||||
* bpf_sk_storage_elem.
|
||||
*/
|
||||
struct rcu_head rcu;
|
||||
raw_spinlock_t lock; /* Protect adding/removing from the "list" */
|
||||
};
|
||||
|
||||
static struct bucket *select_bucket(struct bpf_sk_storage_map *smap,
|
||||
struct bpf_sk_storage_elem *selem)
|
||||
{
|
||||
return &smap->buckets[hash_ptr(selem, smap->bucket_log)];
|
||||
}
|
||||
|
||||
static int omem_charge(struct sock *sk, unsigned int size)
|
||||
{
|
||||
/* same check as in sock_kmalloc() */
|
||||
if (size <= sysctl_optmem_max &&
|
||||
atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
|
||||
atomic_add(size, &sk->sk_omem_alloc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static bool selem_linked_to_sk(const struct bpf_sk_storage_elem *selem)
|
||||
{
|
||||
return !hlist_unhashed(&selem->snode);
|
||||
}
|
||||
|
||||
static bool selem_linked_to_map(const struct bpf_sk_storage_elem *selem)
|
||||
{
|
||||
return !hlist_unhashed(&selem->map_node);
|
||||
}
|
||||
|
||||
static struct bpf_sk_storage_elem *selem_alloc(struct bpf_sk_storage_map *smap,
|
||||
struct sock *sk, void *value,
|
||||
bool charge_omem)
|
||||
{
|
||||
struct bpf_sk_storage_elem *selem;
|
||||
|
||||
if (charge_omem && omem_charge(sk, smap->elem_size))
|
||||
return NULL;
|
||||
|
||||
selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN);
|
||||
if (selem) {
|
||||
if (value)
|
||||
memcpy(SDATA(selem)->data, value, smap->map.value_size);
|
||||
return selem;
|
||||
}
|
||||
|
||||
if (charge_omem)
|
||||
atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* sk_storage->lock must be held and selem->sk_storage == sk_storage.
|
||||
* The caller must ensure selem->smap is still valid to be
|
||||
* dereferenced for its smap->elem_size and smap->cache_idx.
|
||||
*/
|
||||
static bool __selem_unlink_sk(struct bpf_sk_storage *sk_storage,
|
||||
struct bpf_sk_storage_elem *selem,
|
||||
bool uncharge_omem)
|
||||
{
|
||||
struct bpf_sk_storage_map *smap;
|
||||
bool free_sk_storage;
|
||||
struct sock *sk;
|
||||
|
||||
smap = rcu_dereference(SDATA(selem)->smap);
|
||||
sk = sk_storage->sk;
|
||||
|
||||
/* All uncharging on sk->sk_omem_alloc must be done first.
|
||||
* sk may be freed once the last selem is unlinked from sk_storage.
|
||||
*/
|
||||
if (uncharge_omem)
|
||||
atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
|
||||
|
||||
free_sk_storage = hlist_is_singular_node(&selem->snode,
|
||||
&sk_storage->list);
|
||||
if (free_sk_storage) {
|
||||
atomic_sub(sizeof(struct bpf_sk_storage), &sk->sk_omem_alloc);
|
||||
sk_storage->sk = NULL;
|
||||
/* After this RCU_INIT, sk may be freed and cannot be used */
|
||||
RCU_INIT_POINTER(sk->sk_bpf_storage, NULL);
|
||||
|
||||
/* sk_storage is not freed now. sk_storage->lock is
|
||||
* still held and raw_spin_unlock_bh(&sk_storage->lock)
|
||||
* will be done by the caller.
|
||||
*
|
||||
* Although the unlock will be done under
|
||||
* rcu_read_lock(), it is more intutivie to
|
||||
* read if kfree_rcu(sk_storage, rcu) is done
|
||||
* after the raw_spin_unlock_bh(&sk_storage->lock).
|
||||
*
|
||||
* Hence, a "bool free_sk_storage" is returned
|
||||
* to the caller which then calls the kfree_rcu()
|
||||
* after unlock.
|
||||
*/
|
||||
}
|
||||
hlist_del_init_rcu(&selem->snode);
|
||||
if (rcu_access_pointer(sk_storage->cache[smap->cache_idx]) ==
|
||||
SDATA(selem))
|
||||
RCU_INIT_POINTER(sk_storage->cache[smap->cache_idx], NULL);
|
||||
|
||||
kfree_rcu(selem, rcu);
|
||||
|
||||
return free_sk_storage;
|
||||
}
|
||||
|
||||
static void selem_unlink_sk(struct bpf_sk_storage_elem *selem)
|
||||
{
|
||||
struct bpf_sk_storage *sk_storage;
|
||||
bool free_sk_storage = false;
|
||||
|
||||
if (unlikely(!selem_linked_to_sk(selem)))
|
||||
/* selem has already been unlinked from sk */
|
||||
return;
|
||||
|
||||
sk_storage = rcu_dereference(selem->sk_storage);
|
||||
raw_spin_lock_bh(&sk_storage->lock);
|
||||
if (likely(selem_linked_to_sk(selem)))
|
||||
free_sk_storage = __selem_unlink_sk(sk_storage, selem, true);
|
||||
raw_spin_unlock_bh(&sk_storage->lock);
|
||||
|
||||
if (free_sk_storage)
|
||||
kfree_rcu(sk_storage, rcu);
|
||||
}
|
||||
|
||||
/* sk_storage->lock must be held and sk_storage->list cannot be empty */
|
||||
static void __selem_link_sk(struct bpf_sk_storage *sk_storage,
|
||||
struct bpf_sk_storage_elem *selem)
|
||||
{
|
||||
RCU_INIT_POINTER(selem->sk_storage, sk_storage);
|
||||
hlist_add_head(&selem->snode, &sk_storage->list);
|
||||
}
|
||||
|
||||
static void selem_unlink_map(struct bpf_sk_storage_elem *selem)
|
||||
{
|
||||
struct bpf_sk_storage_map *smap;
|
||||
struct bucket *b;
|
||||
|
||||
if (unlikely(!selem_linked_to_map(selem)))
|
||||
/* selem has already be unlinked from smap */
|
||||
return;
|
||||
|
||||
smap = rcu_dereference(SDATA(selem)->smap);
|
||||
b = select_bucket(smap, selem);
|
||||
raw_spin_lock_bh(&b->lock);
|
||||
if (likely(selem_linked_to_map(selem)))
|
||||
hlist_del_init_rcu(&selem->map_node);
|
||||
raw_spin_unlock_bh(&b->lock);
|
||||
}
|
||||
|
||||
static void selem_link_map(struct bpf_sk_storage_map *smap,
|
||||
struct bpf_sk_storage_elem *selem)
|
||||
{
|
||||
struct bucket *b = select_bucket(smap, selem);
|
||||
|
||||
raw_spin_lock_bh(&b->lock);
|
||||
RCU_INIT_POINTER(SDATA(selem)->smap, smap);
|
||||
hlist_add_head_rcu(&selem->map_node, &b->list);
|
||||
raw_spin_unlock_bh(&b->lock);
|
||||
}
|
||||
|
||||
static void selem_unlink(struct bpf_sk_storage_elem *selem)
|
||||
{
|
||||
/* Always unlink from map before unlinking from sk_storage
|
||||
* because selem will be freed after successfully unlinked from
|
||||
* the sk_storage.
|
||||
*/
|
||||
selem_unlink_map(selem);
|
||||
selem_unlink_sk(selem);
|
||||
}
|
||||
|
||||
static struct bpf_sk_storage_data *
|
||||
__sk_storage_lookup(struct bpf_sk_storage *sk_storage,
|
||||
struct bpf_sk_storage_map *smap,
|
||||
bool cacheit_lockit)
|
||||
{
|
||||
struct bpf_sk_storage_data *sdata;
|
||||
struct bpf_sk_storage_elem *selem;
|
||||
|
||||
/* Fast path (cache hit) */
|
||||
sdata = rcu_dereference(sk_storage->cache[smap->cache_idx]);
|
||||
if (sdata && rcu_access_pointer(sdata->smap) == smap)
|
||||
return sdata;
|
||||
|
||||
/* Slow path (cache miss) */
|
||||
hlist_for_each_entry_rcu(selem, &sk_storage->list, snode)
|
||||
if (rcu_access_pointer(SDATA(selem)->smap) == smap)
|
||||
break;
|
||||
|
||||
if (!selem)
|
||||
return NULL;
|
||||
|
||||
sdata = SDATA(selem);
|
||||
if (cacheit_lockit) {
|
||||
/* spinlock is needed to avoid racing with the
|
||||
* parallel delete. Otherwise, publishing an already
|
||||
* deleted sdata to the cache will become a use-after-free
|
||||
* problem in the next __sk_storage_lookup().
|
||||
*/
|
||||
raw_spin_lock_bh(&sk_storage->lock);
|
||||
if (selem_linked_to_sk(selem))
|
||||
rcu_assign_pointer(sk_storage->cache[smap->cache_idx],
|
||||
sdata);
|
||||
raw_spin_unlock_bh(&sk_storage->lock);
|
||||
}
|
||||
|
||||
return sdata;
|
||||
}
|
||||
|
||||
static struct bpf_sk_storage_data *
|
||||
sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit)
|
||||
{
|
||||
struct bpf_sk_storage *sk_storage;
|
||||
struct bpf_sk_storage_map *smap;
|
||||
|
||||
sk_storage = rcu_dereference(sk->sk_bpf_storage);
|
||||
if (!sk_storage)
|
||||
return NULL;
|
||||
|
||||
smap = (struct bpf_sk_storage_map *)map;
|
||||
return __sk_storage_lookup(sk_storage, smap, cacheit_lockit);
|
||||
}
|
||||
|
||||
static int check_flags(const struct bpf_sk_storage_data *old_sdata,
|
||||
u64 map_flags)
|
||||
{
|
||||
if (old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
|
||||
/* elem already exists */
|
||||
return -EEXIST;
|
||||
|
||||
if (!old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
|
||||
/* elem doesn't exist, cannot update it */
|
||||
return -ENOENT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sk_storage_alloc(struct sock *sk,
|
||||
struct bpf_sk_storage_map *smap,
|
||||
struct bpf_sk_storage_elem *first_selem)
|
||||
{
|
||||
struct bpf_sk_storage *prev_sk_storage, *sk_storage;
|
||||
int err;
|
||||
|
||||
err = omem_charge(sk, sizeof(*sk_storage));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
sk_storage = kzalloc(sizeof(*sk_storage), GFP_ATOMIC | __GFP_NOWARN);
|
||||
if (!sk_storage) {
|
||||
err = -ENOMEM;
|
||||
goto uncharge;
|
||||
}
|
||||
INIT_HLIST_HEAD(&sk_storage->list);
|
||||
raw_spin_lock_init(&sk_storage->lock);
|
||||
sk_storage->sk = sk;
|
||||
|
||||
__selem_link_sk(sk_storage, first_selem);
|
||||
selem_link_map(smap, first_selem);
|
||||
/* Publish sk_storage to sk. sk->sk_lock cannot be acquired.
|
||||
* Hence, atomic ops is used to set sk->sk_bpf_storage
|
||||
* from NULL to the newly allocated sk_storage ptr.
|
||||
*
|
||||
* From now on, the sk->sk_bpf_storage pointer is protected
|
||||
* by the sk_storage->lock. Hence, when freeing
|
||||
* the sk->sk_bpf_storage, the sk_storage->lock must
|
||||
* be held before setting sk->sk_bpf_storage to NULL.
|
||||
*/
|
||||
prev_sk_storage = cmpxchg((struct bpf_sk_storage **)&sk->sk_bpf_storage,
|
||||
NULL, sk_storage);
|
||||
if (unlikely(prev_sk_storage)) {
|
||||
selem_unlink_map(first_selem);
|
||||
err = -EAGAIN;
|
||||
goto uncharge;
|
||||
|
||||
/* Note that even first_selem was linked to smap's
|
||||
* bucket->list, first_selem can be freed immediately
|
||||
* (instead of kfree_rcu) because
|
||||
* bpf_sk_storage_map_free() does a
|
||||
* synchronize_rcu() before walking the bucket->list.
|
||||
* Hence, no one is accessing selem from the
|
||||
* bucket->list under rcu_read_lock().
|
||||
*/
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
uncharge:
|
||||
kfree(sk_storage);
|
||||
atomic_sub(sizeof(*sk_storage), &sk->sk_omem_alloc);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* sk cannot be going away because it is linking new elem
|
||||
* to sk->sk_bpf_storage. (i.e. sk->sk_refcnt cannot be 0).
|
||||
* Otherwise, it will become a leak (and other memory issues
|
||||
* during map destruction).
|
||||
*/
|
||||
static struct bpf_sk_storage_data *sk_storage_update(struct sock *sk,
|
||||
struct bpf_map *map,
|
||||
void *value,
|
||||
u64 map_flags)
|
||||
{
|
||||
struct bpf_sk_storage_data *old_sdata = NULL;
|
||||
struct bpf_sk_storage_elem *selem;
|
||||
struct bpf_sk_storage *sk_storage;
|
||||
struct bpf_sk_storage_map *smap;
|
||||
int err;
|
||||
|
||||
/* BPF_EXIST and BPF_NOEXIST cannot be both set */
|
||||
if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
|
||||
/* BPF_F_LOCK can only be used in a value with spin_lock */
|
||||
unlikely((map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
smap = (struct bpf_sk_storage_map *)map;
|
||||
sk_storage = rcu_dereference(sk->sk_bpf_storage);
|
||||
if (!sk_storage || hlist_empty(&sk_storage->list)) {
|
||||
/* Very first elem for this sk */
|
||||
err = check_flags(NULL, map_flags);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
|
||||
selem = selem_alloc(smap, sk, value, true);
|
||||
if (!selem)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
err = sk_storage_alloc(sk, smap, selem);
|
||||
if (err) {
|
||||
kfree(selem);
|
||||
atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
return SDATA(selem);
|
||||
}
|
||||
|
||||
if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) {
|
||||
/* Hoping to find an old_sdata to do inline update
|
||||
* such that it can avoid taking the sk_storage->lock
|
||||
* and changing the lists.
|
||||
*/
|
||||
old_sdata = __sk_storage_lookup(sk_storage, smap, false);
|
||||
err = check_flags(old_sdata, map_flags);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
if (old_sdata && selem_linked_to_sk(SELEM(old_sdata))) {
|
||||
copy_map_value_locked(map, old_sdata->data,
|
||||
value, false);
|
||||
return old_sdata;
|
||||
}
|
||||
}
|
||||
|
||||
raw_spin_lock_bh(&sk_storage->lock);
|
||||
|
||||
/* Recheck sk_storage->list under sk_storage->lock */
|
||||
if (unlikely(hlist_empty(&sk_storage->list))) {
|
||||
/* A parallel del is happening and sk_storage is going
|
||||
* away. It has just been checked before, so very
|
||||
* unlikely. Return instead of retry to keep things
|
||||
* simple.
|
||||
*/
|
||||
err = -EAGAIN;
|
||||
goto unlock_err;
|
||||
}
|
||||
|
||||
old_sdata = __sk_storage_lookup(sk_storage, smap, false);
|
||||
err = check_flags(old_sdata, map_flags);
|
||||
if (err)
|
||||
goto unlock_err;
|
||||
|
||||
if (old_sdata && (map_flags & BPF_F_LOCK)) {
|
||||
copy_map_value_locked(map, old_sdata->data, value, false);
|
||||
selem = SELEM(old_sdata);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* sk_storage->lock is held. Hence, we are sure
|
||||
* we can unlink and uncharge the old_sdata successfully
|
||||
* later. Hence, instead of charging the new selem now
|
||||
* and then uncharge the old selem later (which may cause
|
||||
* a potential but unnecessary charge failure), avoid taking
|
||||
* a charge at all here (the "!old_sdata" check) and the
|
||||
* old_sdata will not be uncharged later during __selem_unlink_sk().
|
||||
*/
|
||||
selem = selem_alloc(smap, sk, value, !old_sdata);
|
||||
if (!selem) {
|
||||
err = -ENOMEM;
|
||||
goto unlock_err;
|
||||
}
|
||||
|
||||
/* First, link the new selem to the map */
|
||||
selem_link_map(smap, selem);
|
||||
|
||||
/* Second, link (and publish) the new selem to sk_storage */
|
||||
__selem_link_sk(sk_storage, selem);
|
||||
|
||||
/* Third, remove old selem, SELEM(old_sdata) */
|
||||
if (old_sdata) {
|
||||
selem_unlink_map(SELEM(old_sdata));
|
||||
__selem_unlink_sk(sk_storage, SELEM(old_sdata), false);
|
||||
}
|
||||
|
||||
unlock:
|
||||
raw_spin_unlock_bh(&sk_storage->lock);
|
||||
return SDATA(selem);
|
||||
|
||||
unlock_err:
|
||||
raw_spin_unlock_bh(&sk_storage->lock);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
|
||||
{
|
||||
struct bpf_sk_storage_data *sdata;
|
||||
|
||||
sdata = sk_storage_lookup(sk, map, false);
|
||||
if (!sdata)
|
||||
return -ENOENT;
|
||||
|
||||
selem_unlink(SELEM(sdata));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Called by __sk_destruct() */
|
||||
void bpf_sk_storage_free(struct sock *sk)
|
||||
{
|
||||
struct bpf_sk_storage_elem *selem;
|
||||
struct bpf_sk_storage *sk_storage;
|
||||
bool free_sk_storage = false;
|
||||
struct hlist_node *n;
|
||||
|
||||
rcu_read_lock();
|
||||
sk_storage = rcu_dereference(sk->sk_bpf_storage);
|
||||
if (!sk_storage) {
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
/* Netiher the bpf_prog nor the bpf-map's syscall
|
||||
* could be modifying the sk_storage->list now.
|
||||
* Thus, no elem can be added-to or deleted-from the
|
||||
* sk_storage->list by the bpf_prog or by the bpf-map's syscall.
|
||||
*
|
||||
* It is racing with bpf_sk_storage_map_free() alone
|
||||
* when unlinking elem from the sk_storage->list and
|
||||
* the map's bucket->list.
|
||||
*/
|
||||
raw_spin_lock_bh(&sk_storage->lock);
|
||||
hlist_for_each_entry_safe(selem, n, &sk_storage->list, snode) {
|
||||
/* Always unlink from map before unlinking from
|
||||
* sk_storage.
|
||||
*/
|
||||
selem_unlink_map(selem);
|
||||
free_sk_storage = __selem_unlink_sk(sk_storage, selem, true);
|
||||
}
|
||||
raw_spin_unlock_bh(&sk_storage->lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (free_sk_storage)
|
||||
kfree_rcu(sk_storage, rcu);
|
||||
}
|
||||
|
||||
static void bpf_sk_storage_map_free(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_sk_storage_elem *selem;
|
||||
struct bpf_sk_storage_map *smap;
|
||||
struct bucket *b;
|
||||
unsigned int i;
|
||||
|
||||
smap = (struct bpf_sk_storage_map *)map;
|
||||
|
||||
synchronize_rcu();
|
||||
|
||||
/* bpf prog and the userspace can no longer access this map
|
||||
* now. No new selem (of this map) can be added
|
||||
* to the sk->sk_bpf_storage or to the map bucket's list.
|
||||
*
|
||||
* The elem of this map can be cleaned up here
|
||||
* or
|
||||
* by bpf_sk_storage_free() during __sk_destruct().
|
||||
*/
|
||||
for (i = 0; i < (1U << smap->bucket_log); i++) {
|
||||
b = &smap->buckets[i];
|
||||
|
||||
rcu_read_lock();
|
||||
/* No one is adding to b->list now */
|
||||
while ((selem = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(&b->list)),
|
||||
struct bpf_sk_storage_elem,
|
||||
map_node))) {
|
||||
selem_unlink(selem);
|
||||
cond_resched_rcu();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/* bpf_sk_storage_free() may still need to access the map.
|
||||
* e.g. bpf_sk_storage_free() has unlinked selem from the map
|
||||
* which then made the above while((selem = ...)) loop
|
||||
* exited immediately.
|
||||
*
|
||||
* However, the bpf_sk_storage_free() still needs to access
|
||||
* the smap->elem_size to do the uncharging in
|
||||
* __selem_unlink_sk().
|
||||
*
|
||||
* Hence, wait another rcu grace period for the
|
||||
* bpf_sk_storage_free() to finish.
|
||||
*/
|
||||
synchronize_rcu();
|
||||
|
||||
kvfree(smap->buckets);
|
||||
kfree(map);
|
||||
}
|
||||
|
||||
static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
|
||||
{
|
||||
if (attr->map_flags != BPF_F_NO_PREALLOC || attr->max_entries ||
|
||||
attr->key_size != sizeof(int) || !attr->value_size ||
|
||||
/* Enforce BTF for userspace sk dumping */
|
||||
!attr->btf_key_type_id || !attr->btf_value_type_id)
|
||||
return -EINVAL;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (attr->value_size >= KMALLOC_MAX_SIZE -
|
||||
MAX_BPF_STACK - sizeof(struct bpf_sk_storage_elem) ||
|
||||
/* U16_MAX is much more than enough for sk local storage
|
||||
* considering a tcp_sock is ~2k.
|
||||
*/
|
||||
attr->value_size > U16_MAX - sizeof(struct bpf_sk_storage_elem))
|
||||
return -E2BIG;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
struct bpf_sk_storage_map *smap;
|
||||
unsigned int i;
|
||||
u32 nbuckets;
|
||||
u64 cost;
|
||||
|
||||
smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN);
|
||||
if (!smap)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
bpf_map_init_from_attr(&smap->map, attr);
|
||||
|
||||
smap->bucket_log = ilog2(roundup_pow_of_two(num_possible_cpus()));
|
||||
nbuckets = 1U << smap->bucket_log;
|
||||
smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
|
||||
GFP_USER | __GFP_NOWARN);
|
||||
if (!smap->buckets) {
|
||||
kfree(smap);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
|
||||
|
||||
for (i = 0; i < nbuckets; i++) {
|
||||
INIT_HLIST_HEAD(&smap->buckets[i].list);
|
||||
raw_spin_lock_init(&smap->buckets[i].lock);
|
||||
}
|
||||
|
||||
smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size;
|
||||
smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) %
|
||||
BPF_SK_STORAGE_CACHE_SIZE;
|
||||
smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
|
||||
|
||||
return &smap->map;
|
||||
}
|
||||
|
||||
static int notsupp_get_next_key(struct bpf_map *map, void *key,
|
||||
void *next_key)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
static int bpf_sk_storage_map_check_btf(const struct bpf_map *map,
|
||||
const struct btf *btf,
|
||||
const struct btf_type *key_type,
|
||||
const struct btf_type *value_type)
|
||||
{
|
||||
u32 int_data;
|
||||
|
||||
if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
|
||||
return -EINVAL;
|
||||
|
||||
int_data = *(u32 *)(key_type + 1);
|
||||
if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
struct bpf_sk_storage_data *sdata;
|
||||
struct socket *sock;
|
||||
int fd, err;
|
||||
|
||||
fd = *(int *)key;
|
||||
sock = sockfd_lookup(fd, &err);
|
||||
if (sock) {
|
||||
sdata = sk_storage_lookup(sock->sk, map, true);
|
||||
sockfd_put(sock);
|
||||
return sdata ? sdata->data : NULL;
|
||||
}
|
||||
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
|
||||
void *value, u64 map_flags)
|
||||
{
|
||||
struct bpf_sk_storage_data *sdata;
|
||||
struct socket *sock;
|
||||
int fd, err;
|
||||
|
||||
fd = *(int *)key;
|
||||
sock = sockfd_lookup(fd, &err);
|
||||
if (sock) {
|
||||
sdata = sk_storage_update(sock->sk, map, value, map_flags);
|
||||
sockfd_put(sock);
|
||||
return IS_ERR(sdata) ? PTR_ERR(sdata) : 0;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
struct socket *sock;
|
||||
int fd, err;
|
||||
|
||||
fd = *(int *)key;
|
||||
sock = sockfd_lookup(fd, &err);
|
||||
if (sock) {
|
||||
err = sk_storage_delete(sock->sk, map);
|
||||
sockfd_put(sock);
|
||||
return err;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
|
||||
void *, value, u64, flags)
|
||||
{
|
||||
struct bpf_sk_storage_data *sdata;
|
||||
|
||||
if (flags > BPF_SK_STORAGE_GET_F_CREATE)
|
||||
return (unsigned long)NULL;
|
||||
|
||||
sdata = sk_storage_lookup(sk, map, true);
|
||||
if (sdata)
|
||||
return (unsigned long)sdata->data;
|
||||
|
||||
if (flags == BPF_SK_STORAGE_GET_F_CREATE &&
|
||||
/* Cannot add new elem to a going away sk.
|
||||
* Otherwise, the new elem may become a leak
|
||||
* (and also other memory issues during map
|
||||
* destruction).
|
||||
*/
|
||||
refcount_inc_not_zero(&sk->sk_refcnt)) {
|
||||
sdata = sk_storage_update(sk, map, value, BPF_NOEXIST);
|
||||
/* sk must be a fullsock (guaranteed by verifier),
|
||||
* so sock_gen_put() is unnecessary.
|
||||
*/
|
||||
sock_put(sk);
|
||||
return IS_ERR(sdata) ?
|
||||
(unsigned long)NULL : (unsigned long)sdata->data;
|
||||
}
|
||||
|
||||
return (unsigned long)NULL;
|
||||
}
|
||||
|
||||
BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
|
||||
{
|
||||
if (refcount_inc_not_zero(&sk->sk_refcnt)) {
|
||||
int err;
|
||||
|
||||
err = sk_storage_delete(sk, map);
|
||||
sock_put(sk);
|
||||
return err;
|
||||
}
|
||||
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
const struct bpf_map_ops sk_storage_map_ops = {
|
||||
.map_alloc_check = bpf_sk_storage_map_alloc_check,
|
||||
.map_alloc = bpf_sk_storage_map_alloc,
|
||||
.map_free = bpf_sk_storage_map_free,
|
||||
.map_get_next_key = notsupp_get_next_key,
|
||||
.map_lookup_elem = bpf_fd_sk_storage_lookup_elem,
|
||||
.map_update_elem = bpf_fd_sk_storage_update_elem,
|
||||
.map_delete_elem = bpf_fd_sk_storage_delete_elem,
|
||||
.map_check_btf = bpf_sk_storage_map_check_btf,
|
||||
};
|
||||
|
||||
const struct bpf_func_proto bpf_sk_storage_get_proto = {
|
||||
.func = bpf_sk_storage_get,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
|
||||
.arg1_type = ARG_CONST_MAP_PTR,
|
||||
.arg2_type = ARG_PTR_TO_SOCKET,
|
||||
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
const struct bpf_func_proto bpf_sk_storage_delete_proto = {
|
||||
.func = bpf_sk_storage_delete,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_CONST_MAP_PTR,
|
||||
.arg2_type = ARG_PTR_TO_SOCKET,
|
||||
};
|
||||
File diff suppressed because it is too large
Load Diff
@@ -25,6 +25,8 @@
|
||||
#include <net/flow_dissector.h>
|
||||
#include <scsi/fc/fc_fcoe.h>
|
||||
#include <uapi/linux/batadv_packet.h>
|
||||
#include <linux/bpf.h>
|
||||
static DEFINE_MUTEX(flow_dissector_mutex);
|
||||
|
||||
static void dissector_set_key(struct flow_dissector *flow_dissector,
|
||||
enum flow_dissector_key_id key_id)
|
||||
@@ -62,6 +64,42 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
|
||||
}
|
||||
EXPORT_SYMBOL(skb_flow_dissector_init);
|
||||
|
||||
int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
|
||||
struct bpf_prog *prog)
|
||||
{
|
||||
struct bpf_prog *attached;
|
||||
struct net *net;
|
||||
net = current->nsproxy->net_ns;
|
||||
mutex_lock(&flow_dissector_mutex);
|
||||
attached = rcu_dereference_protected(net->flow_dissector_prog,
|
||||
lockdep_is_held(&flow_dissector_mutex));
|
||||
if (attached) {
|
||||
/* Only one BPF program can be attached at a time */
|
||||
mutex_unlock(&flow_dissector_mutex);
|
||||
return -EEXIST;
|
||||
}
|
||||
rcu_assign_pointer(net->flow_dissector_prog, prog);
|
||||
mutex_unlock(&flow_dissector_mutex);
|
||||
return 0;
|
||||
}
|
||||
int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
|
||||
{
|
||||
struct bpf_prog *attached;
|
||||
struct net *net;
|
||||
net = current->nsproxy->net_ns;
|
||||
mutex_lock(&flow_dissector_mutex);
|
||||
attached = rcu_dereference_protected(net->flow_dissector_prog,
|
||||
lockdep_is_held(&flow_dissector_mutex));
|
||||
if (!attached) {
|
||||
mutex_unlock(&flow_dissector_mutex);
|
||||
return -ENOENT;
|
||||
}
|
||||
bpf_prog_put(attached);
|
||||
RCU_INIT_POINTER(net->flow_dissector_prog, NULL);
|
||||
mutex_unlock(&flow_dissector_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* skb_flow_get_be16 - extract be16 entity
|
||||
* @skb: sk_buff to extract from
|
||||
@@ -588,6 +626,56 @@ static bool skb_flow_dissect_allowed(int *num_hdrs)
|
||||
return (*num_hdrs <= MAX_FLOW_DISSECT_HDRS);
|
||||
}
|
||||
|
||||
static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
|
||||
struct flow_dissector *flow_dissector,
|
||||
void *target_container)
|
||||
{
|
||||
struct flow_dissector_key_control *key_control;
|
||||
struct flow_dissector_key_basic *key_basic;
|
||||
struct flow_dissector_key_addrs *key_addrs;
|
||||
struct flow_dissector_key_ports *key_ports;
|
||||
key_control = skb_flow_dissector_target(flow_dissector,
|
||||
FLOW_DISSECTOR_KEY_CONTROL,
|
||||
target_container);
|
||||
key_control->thoff = flow_keys->thoff;
|
||||
if (flow_keys->is_frag)
|
||||
key_control->flags |= FLOW_DIS_IS_FRAGMENT;
|
||||
if (flow_keys->is_first_frag)
|
||||
key_control->flags |= FLOW_DIS_FIRST_FRAG;
|
||||
if (flow_keys->is_encap)
|
||||
key_control->flags |= FLOW_DIS_ENCAPSULATION;
|
||||
key_basic = skb_flow_dissector_target(flow_dissector,
|
||||
FLOW_DISSECTOR_KEY_BASIC,
|
||||
target_container);
|
||||
key_basic->n_proto = flow_keys->n_proto;
|
||||
key_basic->ip_proto = flow_keys->ip_proto;
|
||||
if (flow_keys->addr_proto == ETH_P_IP &&
|
||||
dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
|
||||
key_addrs = skb_flow_dissector_target(flow_dissector,
|
||||
FLOW_DISSECTOR_KEY_IPV4_ADDRS,
|
||||
target_container);
|
||||
key_addrs->v4addrs.src = flow_keys->ipv4_src;
|
||||
key_addrs->v4addrs.dst = flow_keys->ipv4_dst;
|
||||
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
|
||||
} else if (flow_keys->addr_proto == ETH_P_IPV6 &&
|
||||
dissector_uses_key(flow_dissector,
|
||||
FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
|
||||
key_addrs = skb_flow_dissector_target(flow_dissector,
|
||||
FLOW_DISSECTOR_KEY_IPV6_ADDRS,
|
||||
target_container);
|
||||
memcpy(&key_addrs->v6addrs, &flow_keys->ipv6_src,
|
||||
sizeof(key_addrs->v6addrs));
|
||||
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
|
||||
}
|
||||
if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) {
|
||||
key_ports = skb_flow_dissector_target(flow_dissector,
|
||||
FLOW_DISSECTOR_KEY_PORTS,
|
||||
target_container);
|
||||
key_ports->src = flow_keys->sport;
|
||||
key_ports->dst = flow_keys->dport;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* __skb_flow_dissect - extract the flow_keys struct and return it
|
||||
* @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
|
||||
@@ -619,6 +707,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
|
||||
struct flow_dissector_key_vlan *key_vlan;
|
||||
enum flow_dissect_ret fdret;
|
||||
enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
|
||||
struct bpf_prog *attached;
|
||||
int num_hdrs = 0;
|
||||
u8 ip_proto = 0;
|
||||
bool ret;
|
||||
@@ -658,6 +747,37 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
|
||||
key_basic = skb_flow_dissector_target(flow_dissector,
|
||||
FLOW_DISSECTOR_KEY_BASIC,
|
||||
target_container);
|
||||
rcu_read_lock();
|
||||
attached = skb ? rcu_dereference(dev_net(skb->dev)->flow_dissector_prog)
|
||||
: NULL;
|
||||
if (attached) {
|
||||
/* Note that even though the const qualifier is discarded
|
||||
* throughout the execution of the BPF program, all changes(the
|
||||
* control block) are reverted after the BPF program returns.
|
||||
* Therefore, __skb_flow_dissect does not alter the skb.
|
||||
*/
|
||||
struct bpf_flow_keys flow_keys = {};
|
||||
struct bpf_skb_data_end cb_saved;
|
||||
struct bpf_skb_data_end *cb;
|
||||
u32 result;
|
||||
cb = (struct bpf_skb_data_end *)skb->cb;
|
||||
/* Save Control Block */
|
||||
memcpy(&cb_saved, cb, sizeof(cb_saved));
|
||||
memset(cb, 0, sizeof(cb_saved));
|
||||
/* Pass parameters to the BPF program */
|
||||
cb->qdisc_cb.flow_keys = &flow_keys;
|
||||
flow_keys.nhoff = nhoff;
|
||||
bpf_compute_data_pointers((struct sk_buff *)skb);
|
||||
result = BPF_PROG_RUN(attached, skb);
|
||||
/* Restore state */
|
||||
memcpy(cb, &cb_saved, sizeof(cb_saved));
|
||||
__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
|
||||
target_container);
|
||||
key_control->thoff = min_t(u16, key_control->thoff, skb->len);
|
||||
rcu_read_unlock();
|
||||
return result == BPF_OK;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (dissector_uses_key(flow_dissector,
|
||||
FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
|
||||
|
||||
@@ -79,6 +79,9 @@
|
||||
|
||||
struct kmem_cache *skbuff_head_cache __ro_after_init;
|
||||
static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
|
||||
#ifdef CONFIG_SKB_EXTENSIONS
|
||||
static struct kmem_cache *skbuff_ext_cache __ro_after_init;
|
||||
#endif
|
||||
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
|
||||
EXPORT_SYMBOL(sysctl_max_skb_frags);
|
||||
|
||||
@@ -633,6 +636,7 @@ void skb_release_head_state(struct sk_buff *skb)
|
||||
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
|
||||
nf_bridge_put(skb->nf_bridge);
|
||||
#endif
|
||||
skb_ext_put(skb);
|
||||
}
|
||||
|
||||
/* Free everything but the sk_buff shell. */
|
||||
@@ -812,6 +816,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
|
||||
new->dev = old->dev;
|
||||
memcpy(new->cb, old->cb, sizeof(old->cb));
|
||||
skb_dst_copy(new, old);
|
||||
__skb_ext_copy(new, old);
|
||||
#ifdef CONFIG_XFRM
|
||||
new->sp = secpath_get(old->sp);
|
||||
#endif
|
||||
@@ -4000,6 +4005,38 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(skb_gro_receive);
|
||||
|
||||
#ifdef CONFIG_SKB_EXTENSIONS
|
||||
#define SKB_EXT_ALIGN_VALUE 8
|
||||
#define SKB_EXT_CHUNKSIZEOF(x) (ALIGN((sizeof(x)), SKB_EXT_ALIGN_VALUE) / SKB_EXT_ALIGN_VALUE)
|
||||
static const u8 skb_ext_type_len[] = {
|
||||
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
|
||||
[SKB_EXT_BRIDGE_NF] = SKB_EXT_CHUNKSIZEOF(struct nf_bridge_info),
|
||||
#endif
|
||||
};
|
||||
|
||||
static __always_inline unsigned int skb_ext_total_length(void)
|
||||
{
|
||||
return SKB_EXT_CHUNKSIZEOF(struct skb_ext) +
|
||||
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
|
||||
skb_ext_type_len[SKB_EXT_BRIDGE_NF] +
|
||||
#endif
|
||||
0;
|
||||
}
|
||||
|
||||
static void skb_extensions_init(void)
|
||||
{
|
||||
BUILD_BUG_ON(SKB_EXT_NUM >= 8);
|
||||
BUILD_BUG_ON(skb_ext_total_length() > 255);
|
||||
skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache",
|
||||
SKB_EXT_ALIGN_VALUE * skb_ext_total_length(),
|
||||
0,
|
||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
|
||||
NULL);
|
||||
}
|
||||
#else
|
||||
static void skb_extensions_init(void) {}
|
||||
#endif
|
||||
|
||||
void __init skb_init(void)
|
||||
{
|
||||
skbuff_head_cache = kmem_cache_create_usercopy("skbuff_head_cache",
|
||||
@@ -4014,6 +4051,7 @@ void __init skb_init(void)
|
||||
0,
|
||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
|
||||
NULL);
|
||||
skb_extensions_init();
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -5663,3 +5701,113 @@ void skb_condense(struct sk_buff *skb)
|
||||
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(skb_condense);
|
||||
|
||||
#ifdef CONFIG_SKB_EXTENSIONS
|
||||
static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id)
|
||||
{
|
||||
return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE);
|
||||
}
|
||||
|
||||
static struct skb_ext *skb_ext_alloc(void)
|
||||
{
|
||||
struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
|
||||
|
||||
if (new) {
|
||||
memset(new->offset, 0, sizeof(new->offset));
|
||||
refcount_set(&new->refcnt, 1);
|
||||
}
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old)
|
||||
{
|
||||
struct skb_ext *new;
|
||||
|
||||
if (refcount_read(&old->refcnt) == 1)
|
||||
return old;
|
||||
|
||||
new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
|
||||
if (!new)
|
||||
return NULL;
|
||||
|
||||
memcpy(new, old, old->chunks * SKB_EXT_ALIGN_VALUE);
|
||||
refcount_set(&new->refcnt, 1);
|
||||
__skb_ext_put(old);
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
/**
|
||||
* skb_ext_add - allocate space for given extension, COW if needed
|
||||
* @skb: buffer
|
||||
* @id: extension to allocate space for
|
||||
*
|
||||
* Allocates enough space for the given extension.
|
||||
* If the extension is already present, a pointer to that extension
|
||||
* is returned.
|
||||
*
|
||||
* If the skb was cloned, COW applies and the returned memory can be
|
||||
* modified without changing the extension space of clones buffers.
|
||||
*
|
||||
* Returns pointer to the extension or NULL on allocation failure.
|
||||
*/
|
||||
void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
|
||||
{
|
||||
struct skb_ext *new, *old = NULL;
|
||||
unsigned int newlen, newoff;
|
||||
|
||||
if (skb->active_extensions) {
|
||||
old = skb->extensions;
|
||||
new = skb_ext_maybe_cow(old);
|
||||
if (!new)
|
||||
return NULL;
|
||||
if (__skb_ext_exist(old, id)) {
|
||||
if (old != new)
|
||||
skb->extensions = new;
|
||||
goto set_active;
|
||||
}
|
||||
newoff = old->chunks;
|
||||
} else {
|
||||
newoff = SKB_EXT_CHUNKSIZEOF(*new);
|
||||
new = skb_ext_alloc();
|
||||
if (!new)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
newlen = newoff + skb_ext_type_len[id];
|
||||
new->chunks = newlen;
|
||||
new->offset[id] = newoff;
|
||||
skb->extensions = new;
|
||||
|
||||
set_active:
|
||||
skb->active_extensions |= 1 << id;
|
||||
return skb_ext_get_ptr(new, id);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(skb_ext_add);
|
||||
void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id)
|
||||
{
|
||||
struct skb_ext *ext = skb->extensions;
|
||||
skb->active_extensions &= ~(1 << id);
|
||||
if (skb->active_extensions == 0) {
|
||||
skb->extensions = NULL;
|
||||
__skb_ext_put(ext);
|
||||
}
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(__skb_ext_del);
|
||||
void __skb_ext_put(struct skb_ext *ext)
|
||||
{
|
||||
/* If this is last clone, nothing can increment
|
||||
* it after check passes. Avoids one atomic op.
|
||||
*/
|
||||
if (refcount_read(&ext->refcnt) == 1)
|
||||
goto free_now;
|
||||
if (!refcount_dec_and_test(&ext->refcnt))
|
||||
return;
|
||||
free_now:
|
||||
kmem_cache_free(skbuff_ext_cache, ext);
|
||||
}
|
||||
EXPORT_SYMBOL(__skb_ext_put);
|
||||
#endif /* CONFIG_SKB_EXTENSIONS */
|
||||
|
||||
763
net/core/skmsg.c
Normal file
763
net/core/skmsg.c
Normal file
@@ -0,0 +1,763 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
|
||||
|
||||
#include <linux/skmsg.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <linux/scatterlist.h>
|
||||
|
||||
#include <net/sock.h>
|
||||
#include <net/tcp.h>
|
||||
|
||||
static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
|
||||
{
|
||||
if (msg->sg.end > msg->sg.start &&
|
||||
elem_first_coalesce < msg->sg.end)
|
||||
return true;
|
||||
|
||||
if (msg->sg.end < msg->sg.start &&
|
||||
(elem_first_coalesce > msg->sg.start ||
|
||||
elem_first_coalesce < msg->sg.end))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
|
||||
int elem_first_coalesce)
|
||||
{
|
||||
struct page_frag *pfrag = sk_page_frag(sk);
|
||||
int ret = 0;
|
||||
|
||||
len -= msg->sg.size;
|
||||
while (len > 0) {
|
||||
struct scatterlist *sge;
|
||||
u32 orig_offset;
|
||||
int use, i;
|
||||
|
||||
if (!sk_page_frag_refill(sk, pfrag))
|
||||
return -ENOMEM;
|
||||
|
||||
orig_offset = pfrag->offset;
|
||||
use = min_t(int, len, pfrag->size - orig_offset);
|
||||
if (!sk_wmem_schedule(sk, use))
|
||||
return -ENOMEM;
|
||||
|
||||
i = msg->sg.end;
|
||||
sk_msg_iter_var_prev(i);
|
||||
sge = &msg->sg.data[i];
|
||||
|
||||
if (sk_msg_try_coalesce_ok(msg, elem_first_coalesce) &&
|
||||
sg_page(sge) == pfrag->page &&
|
||||
sge->offset + sge->length == orig_offset) {
|
||||
sge->length += use;
|
||||
} else {
|
||||
if (sk_msg_full(msg)) {
|
||||
ret = -ENOSPC;
|
||||
break;
|
||||
}
|
||||
|
||||
sge = &msg->sg.data[msg->sg.end];
|
||||
sg_unmark_end(sge);
|
||||
sg_set_page(sge, pfrag->page, use, orig_offset);
|
||||
get_page(pfrag->page);
|
||||
sk_msg_iter_next(msg, end);
|
||||
}
|
||||
|
||||
sk_mem_charge(sk, use);
|
||||
msg->sg.size += use;
|
||||
pfrag->offset += use;
|
||||
len -= use;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_alloc);
|
||||
|
||||
void sk_msg_return_zero(struct sock *sk, struct sk_msg *msg, int bytes)
|
||||
{
|
||||
int i = msg->sg.start;
|
||||
|
||||
do {
|
||||
struct scatterlist *sge = sk_msg_elem(msg, i);
|
||||
|
||||
if (bytes < sge->length) {
|
||||
sge->length -= bytes;
|
||||
sge->offset += bytes;
|
||||
sk_mem_uncharge(sk, bytes);
|
||||
break;
|
||||
}
|
||||
|
||||
sk_mem_uncharge(sk, sge->length);
|
||||
bytes -= sge->length;
|
||||
sge->length = 0;
|
||||
sge->offset = 0;
|
||||
sk_msg_iter_var_next(i);
|
||||
} while (bytes && i != msg->sg.end);
|
||||
msg->sg.start = i;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_return_zero);
|
||||
|
||||
void sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes)
|
||||
{
|
||||
int i = msg->sg.start;
|
||||
|
||||
do {
|
||||
struct scatterlist *sge = &msg->sg.data[i];
|
||||
int uncharge = (bytes < sge->length) ? bytes : sge->length;
|
||||
|
||||
sk_mem_uncharge(sk, uncharge);
|
||||
bytes -= uncharge;
|
||||
sk_msg_iter_var_next(i);
|
||||
} while (i != msg->sg.end);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_return);
|
||||
|
||||
static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i,
|
||||
bool charge)
|
||||
{
|
||||
struct scatterlist *sge = sk_msg_elem(msg, i);
|
||||
u32 len = sge->length;
|
||||
|
||||
if (charge)
|
||||
sk_mem_uncharge(sk, len);
|
||||
if (!msg->skb)
|
||||
put_page(sg_page(sge));
|
||||
memset(sge, 0, sizeof(*sge));
|
||||
return len;
|
||||
}
|
||||
|
||||
static int __sk_msg_free(struct sock *sk, struct sk_msg *msg, u32 i,
|
||||
bool charge)
|
||||
{
|
||||
struct scatterlist *sge = sk_msg_elem(msg, i);
|
||||
int freed = 0;
|
||||
|
||||
while (msg->sg.size) {
|
||||
msg->sg.size -= sge->length;
|
||||
freed += sk_msg_free_elem(sk, msg, i, charge);
|
||||
sk_msg_iter_var_next(i);
|
||||
sk_msg_check_to_free(msg, i, msg->sg.size);
|
||||
sge = sk_msg_elem(msg, i);
|
||||
}
|
||||
if (msg->skb)
|
||||
consume_skb(msg->skb);
|
||||
sk_msg_init(msg);
|
||||
return freed;
|
||||
}
|
||||
|
||||
int sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg)
|
||||
{
|
||||
return __sk_msg_free(sk, msg, msg->sg.start, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_free_nocharge);
|
||||
|
||||
int sk_msg_free(struct sock *sk, struct sk_msg *msg)
|
||||
{
|
||||
return __sk_msg_free(sk, msg, msg->sg.start, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_free);
|
||||
|
||||
static void __sk_msg_free_partial(struct sock *sk, struct sk_msg *msg,
|
||||
u32 bytes, bool charge)
|
||||
{
|
||||
struct scatterlist *sge;
|
||||
u32 i = msg->sg.start;
|
||||
|
||||
while (bytes) {
|
||||
sge = sk_msg_elem(msg, i);
|
||||
if (!sge->length)
|
||||
break;
|
||||
if (bytes < sge->length) {
|
||||
if (charge)
|
||||
sk_mem_uncharge(sk, bytes);
|
||||
sge->length -= bytes;
|
||||
sge->offset += bytes;
|
||||
msg->sg.size -= bytes;
|
||||
break;
|
||||
}
|
||||
|
||||
msg->sg.size -= sge->length;
|
||||
bytes -= sge->length;
|
||||
sk_msg_free_elem(sk, msg, i, charge);
|
||||
sk_msg_iter_var_next(i);
|
||||
sk_msg_check_to_free(msg, i, bytes);
|
||||
}
|
||||
msg->sg.start = i;
|
||||
}
|
||||
|
||||
void sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, u32 bytes)
|
||||
{
|
||||
__sk_msg_free_partial(sk, msg, bytes, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_free_partial);
|
||||
|
||||
void sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg,
|
||||
u32 bytes)
|
||||
{
|
||||
__sk_msg_free_partial(sk, msg, bytes, false);
|
||||
}
|
||||
|
||||
void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len)
|
||||
{
|
||||
int trim = msg->sg.size - len;
|
||||
u32 i = msg->sg.end;
|
||||
|
||||
if (trim <= 0) {
|
||||
WARN_ON(trim < 0);
|
||||
return;
|
||||
}
|
||||
|
||||
sk_msg_iter_var_prev(i);
|
||||
msg->sg.size = len;
|
||||
while (msg->sg.data[i].length &&
|
||||
trim >= msg->sg.data[i].length) {
|
||||
trim -= msg->sg.data[i].length;
|
||||
sk_msg_free_elem(sk, msg, i, true);
|
||||
sk_msg_iter_var_prev(i);
|
||||
if (!trim)
|
||||
goto out;
|
||||
}
|
||||
|
||||
msg->sg.data[i].length -= trim;
|
||||
sk_mem_uncharge(sk, trim);
|
||||
out:
|
||||
/* If we trim data before curr pointer update copybreak and current
|
||||
* so that any future copy operations start at new copy location.
|
||||
* However trimed data that has not yet been used in a copy op
|
||||
* does not require an update.
|
||||
*/
|
||||
if (msg->sg.curr >= i) {
|
||||
msg->sg.curr = i;
|
||||
msg->sg.copybreak = msg->sg.data[i].length;
|
||||
}
|
||||
sk_msg_iter_var_next(i);
|
||||
msg->sg.end = i;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_trim);
|
||||
|
||||
int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
|
||||
struct sk_msg *msg, u32 bytes)
|
||||
{
|
||||
int i, maxpages, ret = 0, num_elems = sk_msg_elem_used(msg);
|
||||
const int to_max_pages = MAX_MSG_FRAGS;
|
||||
struct page *pages[MAX_MSG_FRAGS];
|
||||
ssize_t orig, copied, use, offset;
|
||||
|
||||
orig = msg->sg.size;
|
||||
while (bytes > 0) {
|
||||
i = 0;
|
||||
maxpages = to_max_pages - num_elems;
|
||||
if (maxpages == 0) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
copied = iov_iter_get_pages(from, pages, bytes, maxpages,
|
||||
&offset);
|
||||
if (copied <= 0) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
iov_iter_advance(from, copied);
|
||||
bytes -= copied;
|
||||
msg->sg.size += copied;
|
||||
|
||||
while (copied) {
|
||||
use = min_t(int, copied, PAGE_SIZE - offset);
|
||||
sg_set_page(&msg->sg.data[msg->sg.end],
|
||||
pages[i], use, offset);
|
||||
sg_unmark_end(&msg->sg.data[msg->sg.end]);
|
||||
sk_mem_charge(sk, use);
|
||||
|
||||
offset = 0;
|
||||
copied -= use;
|
||||
sk_msg_iter_next(msg, end);
|
||||
num_elems++;
|
||||
i++;
|
||||
}
|
||||
/* When zerocopy is mixed with sk_msg_*copy* operations we
|
||||
* may have a copybreak set in this case clear and prefer
|
||||
* zerocopy remainder when possible.
|
||||
*/
|
||||
msg->sg.copybreak = 0;
|
||||
msg->sg.curr = msg->sg.end;
|
||||
}
|
||||
out:
|
||||
/* Revert iov_iter updates, msg will need to use 'trim' later if it
|
||||
* also needs to be cleared.
|
||||
*/
|
||||
if (ret)
|
||||
iov_iter_revert(from, msg->sg.size - orig);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_zerocopy_from_iter);
|
||||
|
||||
int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
|
||||
struct sk_msg *msg, u32 bytes)
|
||||
{
|
||||
int ret = -ENOSPC, i = msg->sg.curr;
|
||||
struct scatterlist *sge;
|
||||
u32 copy, buf_size;
|
||||
void *to;
|
||||
|
||||
do {
|
||||
sge = sk_msg_elem(msg, i);
|
||||
/* This is possible if a trim operation shrunk the buffer */
|
||||
if (msg->sg.copybreak >= sge->length) {
|
||||
msg->sg.copybreak = 0;
|
||||
sk_msg_iter_var_next(i);
|
||||
if (i == msg->sg.end)
|
||||
break;
|
||||
sge = sk_msg_elem(msg, i);
|
||||
}
|
||||
|
||||
buf_size = sge->length - msg->sg.copybreak;
|
||||
copy = (buf_size > bytes) ? bytes : buf_size;
|
||||
to = sg_virt(sge) + msg->sg.copybreak;
|
||||
msg->sg.copybreak += copy;
|
||||
if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY)
|
||||
ret = copy_from_iter_nocache(to, copy, from);
|
||||
else
|
||||
ret = copy_from_iter(to, copy, from);
|
||||
if (ret != copy) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
bytes -= copy;
|
||||
if (!bytes)
|
||||
break;
|
||||
msg->sg.copybreak = 0;
|
||||
sk_msg_iter_var_next(i);
|
||||
} while (i != msg->sg.end);
|
||||
out:
|
||||
msg->sg.curr = i;
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
|
||||
|
||||
static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
|
||||
{
|
||||
struct sock *sk = psock->sk;
|
||||
int copied = 0, num_sge;
|
||||
struct sk_msg *msg;
|
||||
|
||||
msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
|
||||
if (unlikely(!msg))
|
||||
return -EAGAIN;
|
||||
if (!sk_rmem_schedule(sk, skb, skb->len)) {
|
||||
kfree(msg);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
sk_msg_init(msg);
|
||||
num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
|
||||
if (unlikely(num_sge < 0)) {
|
||||
kfree(msg);
|
||||
return num_sge;
|
||||
}
|
||||
|
||||
sk_mem_charge(sk, skb->len);
|
||||
copied = skb->len;
|
||||
msg->sg.start = 0;
|
||||
msg->sg.end = num_sge == MAX_MSG_FRAGS ? 0 : num_sge;
|
||||
msg->skb = skb;
|
||||
|
||||
sk_psock_queue_msg(psock, msg);
|
||||
sk->sk_data_ready(sk);
|
||||
return copied;
|
||||
}
|
||||
|
||||
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
|
||||
u32 off, u32 len, bool ingress)
|
||||
{
|
||||
if (ingress)
|
||||
return sk_psock_skb_ingress(psock, skb);
|
||||
else
|
||||
return skb_send_sock_locked(psock->sk, skb, off, len);
|
||||
}
|
||||
|
||||
static void sk_psock_backlog(struct work_struct *work)
|
||||
{
|
||||
struct sk_psock *psock = container_of(work, struct sk_psock, work);
|
||||
struct sk_psock_work_state *state = &psock->work_state;
|
||||
struct sk_buff *skb;
|
||||
bool ingress;
|
||||
u32 len, off;
|
||||
int ret;
|
||||
|
||||
/* Lock sock to avoid losing sk_socket during loop. */
|
||||
lock_sock(psock->sk);
|
||||
if (state->skb) {
|
||||
skb = state->skb;
|
||||
len = state->len;
|
||||
off = state->off;
|
||||
state->skb = NULL;
|
||||
goto start;
|
||||
}
|
||||
|
||||
while ((skb = skb_dequeue(&psock->ingress_skb))) {
|
||||
len = skb->len;
|
||||
off = 0;
|
||||
start:
|
||||
ingress = tcp_skb_bpf_ingress(skb);
|
||||
do {
|
||||
ret = -EIO;
|
||||
if (likely(psock->sk->sk_socket))
|
||||
ret = sk_psock_handle_skb(psock, skb, off,
|
||||
len, ingress);
|
||||
if (ret <= 0) {
|
||||
if (ret == -EAGAIN) {
|
||||
state->skb = skb;
|
||||
state->len = len;
|
||||
state->off = off;
|
||||
goto end;
|
||||
}
|
||||
/* Hard errors break pipe and stop xmit. */
|
||||
sk_psock_report_error(psock, ret ? -ret : EPIPE);
|
||||
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
|
||||
kfree_skb(skb);
|
||||
goto end;
|
||||
}
|
||||
off += ret;
|
||||
len -= ret;
|
||||
} while (len);
|
||||
|
||||
if (!ingress)
|
||||
kfree_skb(skb);
|
||||
}
|
||||
end:
|
||||
release_sock(psock->sk);
|
||||
}
|
||||
|
||||
struct sk_psock *sk_psock_init(struct sock *sk, int node)
|
||||
{
|
||||
struct sk_psock *psock = kzalloc_node(sizeof(*psock),
|
||||
GFP_ATOMIC | __GFP_NOWARN,
|
||||
node);
|
||||
if (!psock)
|
||||
return NULL;
|
||||
|
||||
psock->sk = sk;
|
||||
psock->eval = __SK_NONE;
|
||||
|
||||
INIT_LIST_HEAD(&psock->link);
|
||||
spin_lock_init(&psock->link_lock);
|
||||
|
||||
INIT_WORK(&psock->work, sk_psock_backlog);
|
||||
INIT_LIST_HEAD(&psock->ingress_msg);
|
||||
skb_queue_head_init(&psock->ingress_skb);
|
||||
|
||||
sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED);
|
||||
refcount_set(&psock->refcnt, 1);
|
||||
|
||||
rcu_assign_sk_user_data(sk, psock);
|
||||
sock_hold(sk);
|
||||
|
||||
return psock;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_psock_init);
|
||||
|
||||
struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock)
|
||||
{
|
||||
struct sk_psock_link *link;
|
||||
|
||||
spin_lock_bh(&psock->link_lock);
|
||||
link = list_first_entry_or_null(&psock->link, struct sk_psock_link,
|
||||
list);
|
||||
if (link)
|
||||
list_del(&link->list);
|
||||
spin_unlock_bh(&psock->link_lock);
|
||||
return link;
|
||||
}
|
||||
|
||||
void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
|
||||
{
|
||||
struct sk_msg *msg, *tmp;
|
||||
|
||||
list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) {
|
||||
list_del(&msg->list);
|
||||
sk_msg_free(psock->sk, msg);
|
||||
kfree(msg);
|
||||
}
|
||||
}
|
||||
|
||||
static void sk_psock_zap_ingress(struct sk_psock *psock)
|
||||
{
|
||||
__skb_queue_purge(&psock->ingress_skb);
|
||||
__sk_psock_purge_ingress_msg(psock);
|
||||
}
|
||||
|
||||
static void sk_psock_link_destroy(struct sk_psock *psock)
|
||||
{
|
||||
struct sk_psock_link *link, *tmp;
|
||||
|
||||
list_for_each_entry_safe(link, tmp, &psock->link, list) {
|
||||
list_del(&link->list);
|
||||
sk_psock_free_link(link);
|
||||
}
|
||||
}
|
||||
|
||||
static void sk_psock_destroy_deferred(struct work_struct *gc)
|
||||
{
|
||||
struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
|
||||
|
||||
/* No sk_callback_lock since already detached. */
|
||||
if (psock->parser.enabled)
|
||||
strp_done(&psock->parser.strp);
|
||||
|
||||
cancel_work_sync(&psock->work);
|
||||
|
||||
psock_progs_drop(&psock->progs);
|
||||
|
||||
sk_psock_link_destroy(psock);
|
||||
sk_psock_cork_free(psock);
|
||||
sk_psock_zap_ingress(psock);
|
||||
|
||||
if (psock->sk_redir)
|
||||
sock_put(psock->sk_redir);
|
||||
sock_put(psock->sk);
|
||||
kfree(psock);
|
||||
}
|
||||
|
||||
void sk_psock_destroy(struct rcu_head *rcu)
|
||||
{
|
||||
struct sk_psock *psock = container_of(rcu, struct sk_psock, rcu);
|
||||
|
||||
INIT_WORK(&psock->gc, sk_psock_destroy_deferred);
|
||||
schedule_work(&psock->gc);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_psock_destroy);
|
||||
|
||||
void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
rcu_assign_sk_user_data(sk, NULL);
|
||||
sk_psock_cork_free(psock);
|
||||
sk_psock_restore_proto(sk, psock);
|
||||
|
||||
write_lock_bh(&sk->sk_callback_lock);
|
||||
if (psock->progs.skb_parser)
|
||||
sk_psock_stop_strp(sk, psock);
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
|
||||
|
||||
call_rcu_sched(&psock->rcu, sk_psock_destroy);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_psock_drop);
|
||||
|
||||
static int sk_psock_map_verd(int verdict, bool redir)
|
||||
{
|
||||
switch (verdict) {
|
||||
case SK_PASS:
|
||||
return redir ? __SK_REDIRECT : __SK_PASS;
|
||||
case SK_DROP:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return __SK_DROP;
|
||||
}
|
||||
|
||||
int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
|
||||
struct sk_msg *msg)
|
||||
{
|
||||
struct bpf_prog *prog;
|
||||
int ret;
|
||||
|
||||
preempt_disable();
|
||||
rcu_read_lock();
|
||||
prog = READ_ONCE(psock->progs.msg_parser);
|
||||
if (unlikely(!prog)) {
|
||||
ret = __SK_PASS;
|
||||
goto out;
|
||||
}
|
||||
|
||||
sk_msg_compute_data_pointers(msg);
|
||||
msg->sk = sk;
|
||||
ret = BPF_PROG_RUN(prog, msg);
|
||||
ret = sk_psock_map_verd(ret, msg->sk_redir);
|
||||
psock->apply_bytes = msg->apply_bytes;
|
||||
if (ret == __SK_REDIRECT) {
|
||||
if (psock->sk_redir)
|
||||
sock_put(psock->sk_redir);
|
||||
psock->sk_redir = msg->sk_redir;
|
||||
if (!psock->sk_redir) {
|
||||
ret = __SK_DROP;
|
||||
goto out;
|
||||
}
|
||||
sock_hold(psock->sk_redir);
|
||||
}
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
preempt_enable();
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
|
||||
|
||||
static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
int ret;
|
||||
|
||||
skb->sk = psock->sk;
|
||||
bpf_compute_data_end_sk_skb(skb);
|
||||
preempt_disable();
|
||||
ret = BPF_PROG_RUN(prog, skb);
|
||||
preempt_enable();
|
||||
/* strparser clones the skb before handing it to a upper layer,
|
||||
* meaning skb_orphan has been called. We NULL sk on the way out
|
||||
* to ensure we don't trigger a BUG_ON() in skb/sk operations
|
||||
* later and because we are not charging the memory of this skb
|
||||
* to any socket yet.
|
||||
*/
|
||||
skb->sk = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
|
||||
{
|
||||
struct sk_psock_parser *parser;
|
||||
|
||||
parser = container_of(strp, struct sk_psock_parser, strp);
|
||||
return container_of(parser, struct sk_psock, parser);
|
||||
}
|
||||
|
||||
static void sk_psock_verdict_apply(struct sk_psock *psock,
|
||||
struct sk_buff *skb, int verdict)
|
||||
{
|
||||
struct sk_psock *psock_other;
|
||||
struct sock *sk_other;
|
||||
bool ingress;
|
||||
|
||||
switch (verdict) {
|
||||
case __SK_REDIRECT:
|
||||
sk_other = tcp_skb_bpf_redirect_fetch(skb);
|
||||
if (unlikely(!sk_other))
|
||||
goto out_free;
|
||||
psock_other = sk_psock(sk_other);
|
||||
if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
|
||||
!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED))
|
||||
goto out_free;
|
||||
ingress = tcp_skb_bpf_ingress(skb);
|
||||
if ((!ingress && sock_writeable(sk_other)) ||
|
||||
(ingress &&
|
||||
atomic_read(&sk_other->sk_rmem_alloc) <=
|
||||
sk_other->sk_rcvbuf)) {
|
||||
if (!ingress)
|
||||
skb_set_owner_w(skb, sk_other);
|
||||
skb_queue_tail(&psock_other->ingress_skb, skb);
|
||||
schedule_work(&psock_other->work);
|
||||
break;
|
||||
}
|
||||
/* fall-through */
|
||||
case __SK_DROP:
|
||||
/* fall-through */
|
||||
default:
|
||||
out_free:
|
||||
kfree_skb(skb);
|
||||
}
|
||||
}
|
||||
|
||||
static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
|
||||
{
|
||||
struct sk_psock *psock = sk_psock_from_strp(strp);
|
||||
struct bpf_prog *prog;
|
||||
int ret = __SK_DROP;
|
||||
|
||||
rcu_read_lock();
|
||||
prog = READ_ONCE(psock->progs.skb_verdict);
|
||||
if (likely(prog)) {
|
||||
skb_orphan(skb);
|
||||
tcp_skb_bpf_redirect_clear(skb);
|
||||
ret = sk_psock_bpf_run(psock, prog, skb);
|
||||
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
|
||||
}
|
||||
rcu_read_unlock();
|
||||
sk_psock_verdict_apply(psock, skb, ret);
|
||||
}
|
||||
|
||||
static int sk_psock_strp_read_done(struct strparser *strp, int err)
|
||||
{
|
||||
return err;
|
||||
}
|
||||
|
||||
static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
|
||||
{
|
||||
struct sk_psock *psock = sk_psock_from_strp(strp);
|
||||
struct bpf_prog *prog;
|
||||
int ret = skb->len;
|
||||
|
||||
rcu_read_lock();
|
||||
prog = READ_ONCE(psock->progs.skb_parser);
|
||||
if (likely(prog))
|
||||
ret = sk_psock_bpf_run(psock, prog, skb);
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Called with socket lock held. */
|
||||
static void sk_psock_data_ready(struct sock *sk)
|
||||
{
|
||||
struct sk_psock *psock;
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (likely(psock)) {
|
||||
write_lock_bh(&sk->sk_callback_lock);
|
||||
strp_data_ready(&psock->parser.strp);
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void sk_psock_write_space(struct sock *sk)
|
||||
{
|
||||
struct sk_psock *psock;
|
||||
void (*write_space)(struct sock *sk);
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (likely(psock && sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)))
|
||||
schedule_work(&psock->work);
|
||||
write_space = psock->saved_write_space;
|
||||
rcu_read_unlock();
|
||||
write_space(sk);
|
||||
}
|
||||
|
||||
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
static const struct strp_callbacks cb = {
|
||||
.rcv_msg = sk_psock_strp_read,
|
||||
.read_sock_done = sk_psock_strp_read_done,
|
||||
.parse_msg = sk_psock_strp_parse,
|
||||
};
|
||||
|
||||
psock->parser.enabled = false;
|
||||
return strp_init(&psock->parser.strp, sk, &cb);
|
||||
}
|
||||
|
||||
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
struct sk_psock_parser *parser = &psock->parser;
|
||||
|
||||
if (parser->enabled)
|
||||
return;
|
||||
|
||||
parser->saved_data_ready = sk->sk_data_ready;
|
||||
sk->sk_data_ready = sk_psock_data_ready;
|
||||
sk->sk_write_space = sk_psock_write_space;
|
||||
parser->enabled = true;
|
||||
}
|
||||
|
||||
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
struct sk_psock_parser *parser = &psock->parser;
|
||||
|
||||
if (!parser->enabled)
|
||||
return;
|
||||
|
||||
sk->sk_data_ready = parser->saved_data_ready;
|
||||
parser->saved_data_ready = NULL;
|
||||
strp_stop(&parser->strp);
|
||||
parser->enabled = false;
|
||||
}
|
||||
1002
net/core/sock_map.c
Normal file
1002
net/core/sock_map.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -63,6 +63,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
|
||||
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
|
||||
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
|
||||
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
|
||||
obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o
|
||||
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
|
||||
|
||||
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
|
||||
|
||||
@@ -543,6 +543,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
|
||||
to->tc_index = from->tc_index;
|
||||
#endif
|
||||
nf_copy(to, from);
|
||||
skb_ext_copy(to, from);
|
||||
#if IS_ENABLED(CONFIG_IP_VS)
|
||||
to->ipvs_property = from->ipvs_property;
|
||||
#endif
|
||||
|
||||
655
net/ipv4/tcp_bpf.c
Normal file
655
net/ipv4/tcp_bpf.c
Normal file
@@ -0,0 +1,655 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
|
||||
|
||||
#include <linux/skmsg.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/wait.h>
|
||||
|
||||
#include <net/inet_common.h>
|
||||
|
||||
static bool tcp_bpf_stream_read(const struct sock *sk)
|
||||
{
|
||||
struct sk_psock *psock;
|
||||
bool empty = true;
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (likely(psock))
|
||||
empty = list_empty(&psock->ingress_msg);
|
||||
rcu_read_unlock();
|
||||
return !empty;
|
||||
}
|
||||
|
||||
static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
|
||||
int flags, long timeo, int *err)
|
||||
{
|
||||
DEFINE_WAIT_FUNC(wait, woken_wake_function);
|
||||
int ret;
|
||||
|
||||
add_wait_queue(sk_sleep(sk), &wait);
|
||||
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
|
||||
ret = sk_wait_event(sk, &timeo,
|
||||
!list_empty(&psock->ingress_msg) ||
|
||||
!skb_queue_empty(&sk->sk_receive_queue), &wait);
|
||||
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
|
||||
remove_wait_queue(sk_sleep(sk), &wait);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
|
||||
struct msghdr *msg, int len)
|
||||
{
|
||||
struct iov_iter *iter = &msg->msg_iter;
|
||||
int i, ret, copied = 0;
|
||||
|
||||
while (copied != len) {
|
||||
struct scatterlist *sge;
|
||||
struct sk_msg *msg_rx;
|
||||
|
||||
msg_rx = list_first_entry_or_null(&psock->ingress_msg,
|
||||
struct sk_msg, list);
|
||||
if (unlikely(!msg_rx))
|
||||
break;
|
||||
|
||||
i = msg_rx->sg.start;
|
||||
do {
|
||||
struct page *page;
|
||||
int copy;
|
||||
|
||||
sge = sk_msg_elem(msg_rx, i);
|
||||
copy = sge->length;
|
||||
page = sg_page(sge);
|
||||
if (copied + copy > len)
|
||||
copy = len - copied;
|
||||
ret = copy_page_to_iter(page, sge->offset, copy, iter);
|
||||
if (ret != copy) {
|
||||
msg_rx->sg.start = i;
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
copied += copy;
|
||||
sge->offset += copy;
|
||||
sge->length -= copy;
|
||||
sk_mem_uncharge(sk, copy);
|
||||
if (!sge->length) {
|
||||
i++;
|
||||
if (i == MAX_SKB_FRAGS)
|
||||
i = 0;
|
||||
if (!msg_rx->skb)
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
if (copied == len)
|
||||
break;
|
||||
} while (i != msg_rx->sg.end);
|
||||
|
||||
msg_rx->sg.start = i;
|
||||
if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
|
||||
list_del(&msg_rx->list);
|
||||
if (msg_rx->skb)
|
||||
consume_skb(msg_rx->skb);
|
||||
kfree(msg_rx);
|
||||
}
|
||||
}
|
||||
|
||||
return copied;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__tcp_bpf_recvmsg);
|
||||
|
||||
int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
||||
int nonblock, int flags, int *addr_len)
|
||||
{
|
||||
struct sk_psock *psock;
|
||||
int copied, ret;
|
||||
|
||||
if (unlikely(flags & MSG_ERRQUEUE))
|
||||
return inet_recv_error(sk, msg, len, addr_len);
|
||||
if (!skb_queue_empty(&sk->sk_receive_queue))
|
||||
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
|
||||
|
||||
psock = sk_psock_get(sk);
|
||||
if (unlikely(!psock))
|
||||
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
|
||||
lock_sock(sk);
|
||||
msg_bytes_ready:
|
||||
copied = __tcp_bpf_recvmsg(sk, psock, msg, len);
|
||||
if (!copied) {
|
||||
int data, err = 0;
|
||||
long timeo;
|
||||
|
||||
timeo = sock_rcvtimeo(sk, nonblock);
|
||||
data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err);
|
||||
if (data) {
|
||||
if (skb_queue_empty(&sk->sk_receive_queue))
|
||||
goto msg_bytes_ready;
|
||||
release_sock(sk);
|
||||
sk_psock_put(sk, psock);
|
||||
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
|
||||
}
|
||||
if (err) {
|
||||
ret = err;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ret = copied;
|
||||
out:
|
||||
release_sock(sk);
|
||||
sk_psock_put(sk, psock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
|
||||
struct sk_msg *msg, u32 apply_bytes, int flags)
|
||||
{
|
||||
bool apply = apply_bytes;
|
||||
struct scatterlist *sge;
|
||||
u32 size, copied = 0;
|
||||
struct sk_msg *tmp;
|
||||
int i, ret = 0;
|
||||
|
||||
tmp = kzalloc(sizeof(*tmp), __GFP_NOWARN | GFP_KERNEL);
|
||||
if (unlikely(!tmp))
|
||||
return -ENOMEM;
|
||||
|
||||
lock_sock(sk);
|
||||
tmp->sg.start = msg->sg.start;
|
||||
i = msg->sg.start;
|
||||
do {
|
||||
sge = sk_msg_elem(msg, i);
|
||||
size = (apply && apply_bytes < sge->length) ?
|
||||
apply_bytes : sge->length;
|
||||
if (!sk_wmem_schedule(sk, size)) {
|
||||
if (!copied)
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
sk_mem_charge(sk, size);
|
||||
sk_msg_xfer(tmp, msg, i, size);
|
||||
copied += size;
|
||||
if (sge->length)
|
||||
get_page(sk_msg_page(tmp, i));
|
||||
sk_msg_iter_var_next(i);
|
||||
tmp->sg.end = i;
|
||||
if (apply) {
|
||||
apply_bytes -= size;
|
||||
if (!apply_bytes)
|
||||
break;
|
||||
}
|
||||
} while (i != msg->sg.end);
|
||||
|
||||
if (!ret) {
|
||||
msg->sg.start = i;
|
||||
msg->sg.size -= apply_bytes;
|
||||
sk_psock_queue_msg(psock, tmp);
|
||||
sk->sk_data_ready(sk);
|
||||
} else {
|
||||
sk_msg_free(sk, tmp);
|
||||
kfree(tmp);
|
||||
}
|
||||
|
||||
release_sock(sk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes,
|
||||
int flags, bool uncharge)
|
||||
{
|
||||
bool apply = apply_bytes;
|
||||
struct scatterlist *sge;
|
||||
struct page *page;
|
||||
int size, ret = 0;
|
||||
u32 off;
|
||||
|
||||
while (1) {
|
||||
sge = sk_msg_elem(msg, msg->sg.start);
|
||||
size = (apply && apply_bytes < sge->length) ?
|
||||
apply_bytes : sge->length;
|
||||
off = sge->offset;
|
||||
page = sg_page(sge);
|
||||
|
||||
tcp_rate_check_app_limited(sk);
|
||||
retry:
|
||||
ret = do_tcp_sendpages(sk, page, off, size, flags);
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
if (apply)
|
||||
apply_bytes -= ret;
|
||||
msg->sg.size -= ret;
|
||||
sge->offset += ret;
|
||||
sge->length -= ret;
|
||||
if (uncharge)
|
||||
sk_mem_uncharge(sk, ret);
|
||||
if (ret != size) {
|
||||
size -= ret;
|
||||
off += ret;
|
||||
goto retry;
|
||||
}
|
||||
if (!sge->length) {
|
||||
put_page(page);
|
||||
sk_msg_iter_next(msg, start);
|
||||
sg_init_table(sge, 1);
|
||||
if (msg->sg.start == msg->sg.end)
|
||||
break;
|
||||
}
|
||||
if (apply && !apply_bytes)
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tcp_bpf_push_locked(struct sock *sk, struct sk_msg *msg,
|
||||
u32 apply_bytes, int flags, bool uncharge)
|
||||
{
|
||||
int ret;
|
||||
|
||||
lock_sock(sk);
|
||||
ret = tcp_bpf_push(sk, msg, apply_bytes, flags, uncharge);
|
||||
release_sock(sk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
|
||||
u32 bytes, int flags)
|
||||
{
|
||||
bool ingress = sk_msg_to_ingress(msg);
|
||||
struct sk_psock *psock = sk_psock_get(sk);
|
||||
int ret;
|
||||
|
||||
if (unlikely(!psock)) {
|
||||
sk_msg_free(sk, msg);
|
||||
return 0;
|
||||
}
|
||||
ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) :
|
||||
tcp_bpf_push_locked(sk, msg, bytes, flags, false);
|
||||
sk_psock_put(sk, psock);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir);
|
||||
|
||||
static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
|
||||
struct sk_msg *msg, int *copied, int flags)
|
||||
{
|
||||
bool cork = false, enospc = msg->sg.start == msg->sg.end;
|
||||
struct sock *sk_redir;
|
||||
u32 tosend;
|
||||
int ret;
|
||||
|
||||
more_data:
|
||||
if (psock->eval == __SK_NONE)
|
||||
psock->eval = sk_psock_msg_verdict(sk, psock, msg);
|
||||
|
||||
if (msg->cork_bytes &&
|
||||
msg->cork_bytes > msg->sg.size && !enospc) {
|
||||
psock->cork_bytes = msg->cork_bytes - msg->sg.size;
|
||||
if (!psock->cork) {
|
||||
psock->cork = kzalloc(sizeof(*psock->cork),
|
||||
GFP_ATOMIC | __GFP_NOWARN);
|
||||
if (!psock->cork)
|
||||
return -ENOMEM;
|
||||
}
|
||||
memcpy(psock->cork, msg, sizeof(*msg));
|
||||
return 0;
|
||||
}
|
||||
|
||||
tosend = msg->sg.size;
|
||||
if (psock->apply_bytes && psock->apply_bytes < tosend)
|
||||
tosend = psock->apply_bytes;
|
||||
|
||||
switch (psock->eval) {
|
||||
case __SK_PASS:
|
||||
ret = tcp_bpf_push(sk, msg, tosend, flags, true);
|
||||
if (unlikely(ret)) {
|
||||
*copied -= sk_msg_free(sk, msg);
|
||||
break;
|
||||
}
|
||||
sk_msg_apply_bytes(psock, tosend);
|
||||
break;
|
||||
case __SK_REDIRECT:
|
||||
sk_redir = psock->sk_redir;
|
||||
sk_msg_apply_bytes(psock, tosend);
|
||||
if (psock->cork) {
|
||||
cork = true;
|
||||
psock->cork = NULL;
|
||||
}
|
||||
sk_msg_return(sk, msg, tosend);
|
||||
release_sock(sk);
|
||||
ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags);
|
||||
lock_sock(sk);
|
||||
if (unlikely(ret < 0)) {
|
||||
int free = sk_msg_free_nocharge(sk, msg);
|
||||
|
||||
if (!cork)
|
||||
*copied -= free;
|
||||
}
|
||||
if (cork) {
|
||||
sk_msg_free(sk, msg);
|
||||
kfree(msg);
|
||||
msg = NULL;
|
||||
ret = 0;
|
||||
}
|
||||
break;
|
||||
case __SK_DROP:
|
||||
default:
|
||||
sk_msg_free_partial(sk, msg, tosend);
|
||||
sk_msg_apply_bytes(psock, tosend);
|
||||
*copied -= tosend;
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
if (likely(!ret)) {
|
||||
if (!psock->apply_bytes) {
|
||||
psock->eval = __SK_NONE;
|
||||
if (psock->sk_redir) {
|
||||
sock_put(psock->sk_redir);
|
||||
psock->sk_redir = NULL;
|
||||
}
|
||||
}
|
||||
if (msg &&
|
||||
msg->sg.data[msg->sg.start].page_link &&
|
||||
msg->sg.data[msg->sg.start].length)
|
||||
goto more_data;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
|
||||
{
|
||||
struct sk_msg tmp, *msg_tx = NULL;
|
||||
int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS;
|
||||
int copied = 0, err = 0;
|
||||
struct sk_psock *psock;
|
||||
long timeo;
|
||||
|
||||
psock = sk_psock_get(sk);
|
||||
if (unlikely(!psock))
|
||||
return tcp_sendmsg(sk, msg, size);
|
||||
|
||||
lock_sock(sk);
|
||||
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
|
||||
while (msg_data_left(msg)) {
|
||||
bool enospc = false;
|
||||
u32 copy, osize;
|
||||
|
||||
if (sk->sk_err) {
|
||||
err = -sk->sk_err;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
copy = msg_data_left(msg);
|
||||
if (!sk_stream_memory_free(sk))
|
||||
goto wait_for_sndbuf;
|
||||
if (psock->cork) {
|
||||
msg_tx = psock->cork;
|
||||
} else {
|
||||
msg_tx = &tmp;
|
||||
sk_msg_init(msg_tx);
|
||||
}
|
||||
|
||||
osize = msg_tx->sg.size;
|
||||
err = sk_msg_alloc(sk, msg_tx, msg_tx->sg.size + copy, msg_tx->sg.end - 1);
|
||||
if (err) {
|
||||
if (err != -ENOSPC)
|
||||
goto wait_for_memory;
|
||||
enospc = true;
|
||||
copy = msg_tx->sg.size - osize;
|
||||
}
|
||||
|
||||
err = sk_msg_memcopy_from_iter(sk, &msg->msg_iter, msg_tx,
|
||||
copy);
|
||||
if (err < 0) {
|
||||
sk_msg_trim(sk, msg_tx, osize);
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
copied += copy;
|
||||
if (psock->cork_bytes) {
|
||||
if (size > psock->cork_bytes)
|
||||
psock->cork_bytes = 0;
|
||||
else
|
||||
psock->cork_bytes -= size;
|
||||
if (psock->cork_bytes && !enospc)
|
||||
goto out_err;
|
||||
/* All cork bytes are accounted, rerun the prog. */
|
||||
psock->eval = __SK_NONE;
|
||||
psock->cork_bytes = 0;
|
||||
}
|
||||
|
||||
err = tcp_bpf_send_verdict(sk, psock, msg_tx, &copied, flags);
|
||||
if (unlikely(err < 0))
|
||||
goto out_err;
|
||||
continue;
|
||||
wait_for_sndbuf:
|
||||
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
|
||||
wait_for_memory:
|
||||
err = sk_stream_wait_memory(sk, &timeo);
|
||||
if (err) {
|
||||
if (msg_tx && msg_tx != psock->cork)
|
||||
sk_msg_free(sk, msg_tx);
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
out_err:
|
||||
if (err < 0)
|
||||
err = sk_stream_error(sk, msg->msg_flags, err);
|
||||
release_sock(sk);
|
||||
sk_psock_put(sk, psock);
|
||||
return copied ? copied : err;
|
||||
}
|
||||
|
||||
static int tcp_bpf_sendpage(struct sock *sk, struct page *page, int offset,
|
||||
size_t size, int flags)
|
||||
{
|
||||
struct sk_msg tmp, *msg = NULL;
|
||||
int err = 0, copied = 0;
|
||||
struct sk_psock *psock;
|
||||
bool enospc = false;
|
||||
|
||||
psock = sk_psock_get(sk);
|
||||
if (unlikely(!psock))
|
||||
return tcp_sendpage(sk, page, offset, size, flags);
|
||||
|
||||
lock_sock(sk);
|
||||
if (psock->cork) {
|
||||
msg = psock->cork;
|
||||
} else {
|
||||
msg = &tmp;
|
||||
sk_msg_init(msg);
|
||||
}
|
||||
|
||||
/* Catch case where ring is full and sendpage is stalled. */
|
||||
if (unlikely(sk_msg_full(msg)))
|
||||
goto out_err;
|
||||
|
||||
sk_msg_page_add(msg, page, size, offset);
|
||||
sk_mem_charge(sk, size);
|
||||
copied = size;
|
||||
if (sk_msg_full(msg))
|
||||
enospc = true;
|
||||
if (psock->cork_bytes) {
|
||||
if (size > psock->cork_bytes)
|
||||
psock->cork_bytes = 0;
|
||||
else
|
||||
psock->cork_bytes -= size;
|
||||
if (psock->cork_bytes && !enospc)
|
||||
goto out_err;
|
||||
/* All cork bytes are accounted, rerun the prog. */
|
||||
psock->eval = __SK_NONE;
|
||||
psock->cork_bytes = 0;
|
||||
}
|
||||
|
||||
err = tcp_bpf_send_verdict(sk, psock, msg, &copied, flags);
|
||||
out_err:
|
||||
release_sock(sk);
|
||||
sk_psock_put(sk, psock);
|
||||
return copied ? copied : err;
|
||||
}
|
||||
|
||||
static void tcp_bpf_remove(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
struct sk_psock_link *link;
|
||||
|
||||
sk_psock_cork_free(psock);
|
||||
__sk_psock_purge_ingress_msg(psock);
|
||||
while ((link = sk_psock_link_pop(psock))) {
|
||||
sk_psock_unlink(sk, link);
|
||||
sk_psock_free_link(link);
|
||||
}
|
||||
}
|
||||
|
||||
static void tcp_bpf_unhash(struct sock *sk)
|
||||
{
|
||||
void (*saved_unhash)(struct sock *sk);
|
||||
struct sk_psock *psock;
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (unlikely(!psock)) {
|
||||
rcu_read_unlock();
|
||||
if (sk->sk_prot->unhash)
|
||||
sk->sk_prot->unhash(sk);
|
||||
return;
|
||||
}
|
||||
|
||||
saved_unhash = psock->saved_unhash;
|
||||
tcp_bpf_remove(sk, psock);
|
||||
rcu_read_unlock();
|
||||
saved_unhash(sk);
|
||||
}
|
||||
|
||||
static void tcp_bpf_close(struct sock *sk, long timeout)
|
||||
{
|
||||
void (*saved_close)(struct sock *sk, long timeout);
|
||||
struct sk_psock *psock;
|
||||
|
||||
lock_sock(sk);
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (unlikely(!psock)) {
|
||||
rcu_read_unlock();
|
||||
release_sock(sk);
|
||||
return sk->sk_prot->close(sk, timeout);
|
||||
}
|
||||
|
||||
saved_close = psock->saved_close;
|
||||
tcp_bpf_remove(sk, psock);
|
||||
rcu_read_unlock();
|
||||
release_sock(sk);
|
||||
saved_close(sk, timeout);
|
||||
}
|
||||
|
||||
enum {
|
||||
TCP_BPF_IPV4,
|
||||
TCP_BPF_IPV6,
|
||||
TCP_BPF_NUM_PROTS,
|
||||
};
|
||||
|
||||
enum {
|
||||
TCP_BPF_BASE,
|
||||
TCP_BPF_TX,
|
||||
TCP_BPF_NUM_CFGS,
|
||||
};
|
||||
|
||||
static struct proto *tcpv6_prot_saved __read_mostly;
|
||||
static DEFINE_SPINLOCK(tcpv6_prot_lock);
|
||||
static struct proto tcp_bpf_prots[TCP_BPF_NUM_PROTS][TCP_BPF_NUM_CFGS];
|
||||
|
||||
static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
|
||||
struct proto *base)
|
||||
{
|
||||
prot[TCP_BPF_BASE] = *base;
|
||||
prot[TCP_BPF_BASE].unhash = tcp_bpf_unhash;
|
||||
prot[TCP_BPF_BASE].close = tcp_bpf_close;
|
||||
prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg;
|
||||
prot[TCP_BPF_BASE].stream_memory_read = tcp_bpf_stream_read;
|
||||
|
||||
prot[TCP_BPF_TX] = prot[TCP_BPF_BASE];
|
||||
prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg;
|
||||
prot[TCP_BPF_TX].sendpage = tcp_bpf_sendpage;
|
||||
}
|
||||
|
||||
static void tcp_bpf_check_v6_needs_rebuild(struct sock *sk, struct proto *ops)
|
||||
{
|
||||
if (sk->sk_family == AF_INET6 &&
|
||||
unlikely(ops != smp_load_acquire(&tcpv6_prot_saved))) {
|
||||
spin_lock_bh(&tcpv6_prot_lock);
|
||||
if (likely(ops != tcpv6_prot_saved)) {
|
||||
tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV6], ops);
|
||||
smp_store_release(&tcpv6_prot_saved, ops);
|
||||
}
|
||||
spin_unlock_bh(&tcpv6_prot_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static int __init tcp_bpf_v4_build_proto(void)
|
||||
{
|
||||
tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV4], &tcp_prot);
|
||||
return 0;
|
||||
}
|
||||
core_initcall(tcp_bpf_v4_build_proto);
|
||||
|
||||
static void tcp_bpf_update_sk_prot(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
|
||||
int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
|
||||
|
||||
sk_psock_update_proto(sk, psock, &tcp_bpf_prots[family][config]);
|
||||
}
|
||||
|
||||
static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
|
||||
int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
|
||||
|
||||
/* Reinit occurs when program types change e.g. TCP_BPF_TX is removed
|
||||
* or added requiring sk_prot hook updates. We keep original saved
|
||||
* hooks in this case.
|
||||
*/
|
||||
sk->sk_prot = &tcp_bpf_prots[family][config];
|
||||
}
|
||||
|
||||
static int tcp_bpf_assert_proto_ops(struct proto *ops)
|
||||
{
|
||||
/* In order to avoid retpoline, we make assumptions when we call
|
||||
* into ops if e.g. a psock is not present. Make sure they are
|
||||
* indeed valid assumptions.
|
||||
*/
|
||||
return ops->recvmsg == tcp_recvmsg &&
|
||||
ops->sendmsg == tcp_sendmsg &&
|
||||
ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP;
|
||||
}
|
||||
|
||||
void tcp_bpf_reinit(struct sock *sk)
|
||||
{
|
||||
struct sk_psock *psock;
|
||||
|
||||
sock_owned_by_me(sk);
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
tcp_bpf_reinit_sk_prot(sk, psock);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
int tcp_bpf_init(struct sock *sk)
|
||||
{
|
||||
struct proto *ops = READ_ONCE(sk->sk_prot);
|
||||
struct sk_psock *psock;
|
||||
|
||||
sock_owned_by_me(sk);
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (unlikely(!psock || psock->sk_proto ||
|
||||
tcp_bpf_assert_proto_ops(ops))) {
|
||||
rcu_read_unlock();
|
||||
return -EINVAL;
|
||||
}
|
||||
tcp_bpf_check_v6_needs_rebuild(sk, ops);
|
||||
tcp_bpf_update_sk_prot(sk, psock);
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
@@ -621,6 +621,7 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
|
||||
to->tc_index = from->tc_index;
|
||||
#endif
|
||||
nf_copy(to, from);
|
||||
skb_ext_copy(to, from);
|
||||
skb_copy_secmark(to, from);
|
||||
}
|
||||
|
||||
|
||||
@@ -229,7 +229,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
|
||||
}
|
||||
#ifdef CONFIG_XFRM
|
||||
case NFT_META_SECPATH:
|
||||
nft_reg_store8(dest, !!skb->sp);
|
||||
nft_reg_store8(dest, secpath_exists(skb));
|
||||
break;
|
||||
#endif
|
||||
#ifdef CONFIG_NF_TABLES_BRIDGE
|
||||
|
||||
30
net/socket.c
30
net/socket.c
@@ -2045,6 +2045,8 @@ SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
|
||||
static int __sys_setsockopt(int fd, int level, int optname,
|
||||
char __user *optval, int optlen)
|
||||
{
|
||||
mm_segment_t oldfs = get_fs();
|
||||
char *kernel_optval = NULL;
|
||||
int err, fput_needed;
|
||||
struct socket *sock;
|
||||
|
||||
@@ -2057,6 +2059,20 @@ static int __sys_setsockopt(int fd, int level, int optname,
|
||||
if (err)
|
||||
goto out_put;
|
||||
|
||||
err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level,
|
||||
&optname, optval, &optlen,
|
||||
&kernel_optval);
|
||||
if (err < 0) {
|
||||
goto out_put;
|
||||
} else if (err > 0) {
|
||||
err = 0;
|
||||
goto out_put;
|
||||
}
|
||||
if (kernel_optval) {
|
||||
set_fs(KERNEL_DS);
|
||||
optval = (char __user __force *)kernel_optval;
|
||||
}
|
||||
|
||||
if (level == SOL_SOCKET)
|
||||
err =
|
||||
sock_setsockopt(sock, level, optname, optval,
|
||||
@@ -2065,6 +2081,12 @@ static int __sys_setsockopt(int fd, int level, int optname,
|
||||
err =
|
||||
sock->ops->setsockopt(sock, level, optname, optval,
|
||||
optlen);
|
||||
|
||||
if (kernel_optval) {
|
||||
set_fs(oldfs);
|
||||
kfree(kernel_optval);
|
||||
}
|
||||
|
||||
out_put:
|
||||
fput_light(sock->file, fput_needed);
|
||||
}
|
||||
@@ -2087,6 +2109,7 @@ static int __sys_getsockopt(int fd, int level, int optname,
|
||||
{
|
||||
int err, fput_needed;
|
||||
struct socket *sock;
|
||||
int max_optlen;
|
||||
|
||||
sock = sockfd_lookup_light(fd, &err, &fput_needed);
|
||||
if (sock != NULL) {
|
||||
@@ -2094,6 +2117,8 @@ static int __sys_getsockopt(int fd, int level, int optname,
|
||||
if (err)
|
||||
goto out_put;
|
||||
|
||||
max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
|
||||
|
||||
if (level == SOL_SOCKET)
|
||||
err =
|
||||
sock_getsockopt(sock, level, optname, optval,
|
||||
@@ -2102,6 +2127,11 @@ static int __sys_getsockopt(int fd, int level, int optname,
|
||||
err =
|
||||
sock->ops->getsockopt(sock, level, optname, optval,
|
||||
optlen);
|
||||
|
||||
err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
|
||||
optval, optlen,
|
||||
max_optlen, err);
|
||||
|
||||
out_put:
|
||||
fput_light(sock->file, fput_needed);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,2 @@
|
||||
|
||||
config STREAM_PARSER
|
||||
tristate
|
||||
default n
|
||||
def_bool n
|
||||
|
||||
@@ -325,8 +325,12 @@ static int dsi_phy_settings_init(struct platform_device *pdev,
|
||||
/* Actual timing values are dependent on panel */
|
||||
timing->count_per_lane = phy->ver_info->timing_cfg_count;
|
||||
|
||||
#ifndef CONFIG_MACH_XIAOMI_UMI
|
||||
phy->allow_phy_power_off = of_property_read_bool(pdev->dev.of_node,
|
||||
"qcom,panel-allow-phy-poweroff");
|
||||
#else
|
||||
phy->allow_phy_power_off = false;
|
||||
#endif
|
||||
|
||||
of_property_read_u32(pdev->dev.of_node,
|
||||
"qcom,dsi-phy-regulator-min-datarate-bps",
|
||||
|
||||
Reference in New Issue
Block a user