After the jump rule is added for 3 to 5 times, the system freezes and even softlockup occurs. for((i=1;i<15;i++));do nft add rule inet $table test$i jump test$((i+1));done for((i=1;i<15;i++));do nft add rule inet $table test$i jump test$((i+1));done for((i=1;i<15;i++));do nft add rule inet $table test$i jump test$((i+1));done This patch is a different approach than the original proposed approach found in the bug report. Add a new counter, total_jump_counter, to nft_ctx. On every call to nft_table_validate() (rule addition time, versus packet inspection time) start the counter at 0. Increment said counter for every jump encountered during table validation. If the counter ever exceeds the system's jump limit *during validation*, gracefully reject the rule with -EMLINK (the same behavior as exceeding NFT_JUMP_STACK_SIZE). This allows immediate feedback to the user about a bad chain, versus the original idea (from the bug report) of allowing the addition to the table. It keeps the in memory ruleset consistent, versus catching the failure during packet inspection at some unknown point in the future and arbitrarily denying the packet. Most importantly, it removes the original problem of a kernel crash. The compile time limit NFT_DEFAULT_MAX_TABLE_JUMPS of 8192 was chosen to account for any normal use case, and when this value (and associated stressing loop table) was tested against a 1CPU/256MB machine, the system remained functional. A sysctl entry net/netfilter/nf_max_table_jumps for the limit was also added for any use cases that may exceed this limit. As it is a control limit, it is only available in the root network namespace. Example output of nft when patch is applied (and count is reached): Error: Could not process rule: Too many links add rule inet loop-test test14 jump test15 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Signed-off-by: Shaun Brady <brady.1345@xxxxxxxxx> Closes: https://bugzilla.netfilter.org/show_bug.cgi?id=1665 --- Documentation/networking/netfilter-sysctl.rst | 8 ++ include/net/netfilter/nf_tables.h | 1 + include/net/netns/netfilter.h | 1 + net/netfilter/nf_tables_api.c | 75 ++++++++++++++++++- net/netfilter/nft_immediate.c | 1 + 5 files changed, 85 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/netfilter-sysctl.rst b/Documentation/networking/netfilter-sysctl.rst index beb6d7b275d4..d957272c8ae6 100644 --- a/Documentation/networking/netfilter-sysctl.rst +++ b/Documentation/networking/netfilter-sysctl.rst @@ -15,3 +15,11 @@ nf_log_all_netns - BOOLEAN with LOG target; this aims to prevent containers from flooding host kernel log. If enabled, this target also works in other network namespaces. This variable is only accessible from init_net. + +nf_max_table_jumps - INTEGER (count) + default 8192 + + The maximum numbers of jumps a table can contain. Meeting or exceeding + this value will cause additional rules to not be added with + EMLINK being return to the user. This variable is only accessible from + init_net. diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 0027beca5cd5..9c9473c37f08 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -218,6 +218,7 @@ struct nft_ctx { const struct nlattr * const *nla; u32 portid; u32 seq; + u32 total_jump_count; u16 flags; u8 family; u8 level; diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index a6a0bf4a247e..cbb4f672d21f 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -15,6 +15,7 @@ struct netns_nf { const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO]; #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; + struct ctl_table_header *nf_limit_control_dir_header; #ifdef CONFIG_LWTUNNEL struct ctl_table_header *nf_lwtnl_dir_header; #endif diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f7ca7165e66e..e70c6cb7d90a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -16,6 +16,7 @@ #include <linux/netfilter.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nf_tables.h> +#include <linux/sysctl.h> #include <net/netfilter/nf_flow_table.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> @@ -25,10 +26,14 @@ #define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-")) #define NFT_SET_MAX_ANONLEN 16 +#define NFT_DEFAULT_MAX_TABLE_JUMPS 8192 /* limit compaction to avoid huge kmalloc/krealloc sizes. */ #define NFT_MAX_SET_NELEMS ((2048 - sizeof(struct nft_trans_elem)) / sizeof(struct nft_trans_one_elem)) +u32 sysctl_nf_max_table_jumps __read_mostly = NFT_DEFAULT_MAX_TABLE_JUMPS; +EXPORT_SYMBOL(sysctl_nf_max_table_jumps); + unsigned int nf_tables_net_id __read_mostly; static LIST_HEAD(nf_tables_expressions); @@ -4009,7 +4014,8 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) struct nft_rule *rule; int err; - if (ctx->level == NFT_JUMP_STACK_SIZE) + if (ctx->level == NFT_JUMP_STACK_SIZE || + ctx->total_jump_count >= sysctl_nf_max_table_jumps) return -EMLINK; list_for_each_entry(rule, &chain->rules, list) { @@ -4042,6 +4048,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) struct nft_ctx ctx = { .net = net, .family = table->family, + .total_jump_count = 0, }; int err; @@ -4081,6 +4088,7 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, case NFT_JUMP: case NFT_GOTO: pctx->level++; + pctx->total_jump_count++; err = nft_chain_validate(ctx, data->verdict.chain); if (err < 0) return err; @@ -11875,6 +11883,67 @@ static struct notifier_block nft_nl_notifier = { .notifier_call = nft_rcv_nl_event, }; +#ifdef CONFIG_SYSCTL +static struct ctl_table nf_limit_control_sysctl_table[] = { + { + .procname = "nf_max_table_jumps", + .data = &sysctl_nf_max_table_jumps, + .maxlen = sizeof(sysctl_nf_max_table_jumps), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +}; + +static int netfilter_limit_control_sysctl_init(struct net *net) +{ + if (net_eq(net, &init_net)) { + net->nf.nf_limit_control_dir_header = register_net_sysctl( + net, + "net/netfilter", + nf_limit_control_sysctl_table); + if (!net->nf.nf_limit_control_dir_header) + goto err_alloc; + } + return 0; + +err_alloc: + return -ENOMEM; +} + +static void netfilter_limit_control_sysctl_exit(struct net *net) +{ + unregister_net_sysctl_table(net->nf.nf_limit_control_dir_header); +} +#else +static int netfilter_limit_control_sysctl_init(struct net *net) +{ + return 0; +} + +static void netfilter_limit_control_sysctl_exit(struct net *net) +{ +} +#endif /* CONFIG_SYSCTL */ + +static int __net_init nf_limit_control_net_init(struct net *net) +{ + int ret = netfilter_limit_control_sysctl_init(net); + + if (ret < 0) + return ret; + return 0; +} + +static void __net_exit nf_limit_control_net_exit(struct net *net) +{ + netfilter_limit_control_sysctl_exit(net); +} + +static struct pernet_operations nf_limit_control_net_ops = { + .init = nf_limit_control_net_init, + .exit = nf_limit_control_net_exit, +}; + static int __net_init nf_tables_init_net(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); @@ -11957,6 +12026,10 @@ static int __init nf_tables_module_init(void) if (err < 0) return err; + err = register_pernet_subsys(&nf_limit_control_net_ops); + if (err < 0) + return err; + err = nft_chain_filter_init(); if (err < 0) goto err_chain_filter; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 02ee5fb69871..b21736e389a4 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -260,6 +260,7 @@ static int nft_immediate_validate(const struct nft_ctx *ctx, case NFT_JUMP: case NFT_GOTO: pctx->level++; + pctx->total_jump_count++; err = nft_chain_validate(ctx, data->verdict.chain); if (err < 0) return err; -- 2.49.0