sysctl_net_core.c 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. /* -*- linux-c -*-
  2. * sysctl_net_core.c: sysctl interface to net core subsystem.
  3. *
  4. * Begun April 1, 1996, Mike Shaver.
  5. * Added /proc/sys/net/core directory entry (empty =) ). [MS]
  6. */
  7. #include <linux/mm.h>
  8. #include <linux/sysctl.h>
  9. #include <linux/module.h>
  10. #include <linux/socket.h>
  11. #include <linux/netdevice.h>
  12. #include <linux/ratelimit.h>
  13. #include <linux/vmalloc.h>
  14. #include <linux/init.h>
  15. #include <linux/slab.h>
  16. #include <linux/kmemleak.h>
  17. #include <net/ip.h>
  18. #include <net/sock.h>
  19. #include <net/net_ratelimit.h>
  20. #include <net/busy_poll.h>
  21. static int one = 1;
  22. #ifdef CONFIG_RPS
  23. static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
  24. void __user *buffer, size_t *lenp, loff_t *ppos)
  25. {
  26. unsigned int orig_size, size;
  27. int ret, i;
  28. struct ctl_table tmp = {
  29. .data = &size,
  30. .maxlen = sizeof(size),
  31. .mode = table->mode
  32. };
  33. struct rps_sock_flow_table *orig_sock_table, *sock_table;
  34. static DEFINE_MUTEX(sock_flow_mutex);
  35. mutex_lock(&sock_flow_mutex);
  36. orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
  37. lockdep_is_held(&sock_flow_mutex));
  38. size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
  39. ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
  40. if (write) {
  41. if (size) {
  42. if (size > 1<<30) {
  43. /* Enforce limit to prevent overflow */
  44. mutex_unlock(&sock_flow_mutex);
  45. return -EINVAL;
  46. }
  47. size = roundup_pow_of_two(size);
  48. if (size != orig_size) {
  49. sock_table =
  50. vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size));
  51. if (!sock_table) {
  52. mutex_unlock(&sock_flow_mutex);
  53. return -ENOMEM;
  54. }
  55. sock_table->mask = size - 1;
  56. } else
  57. sock_table = orig_sock_table;
  58. for (i = 0; i < size; i++)
  59. sock_table->ents[i] = RPS_NO_CPU;
  60. } else
  61. sock_table = NULL;
  62. if (sock_table != orig_sock_table) {
  63. rcu_assign_pointer(rps_sock_flow_table, sock_table);
  64. if (sock_table)
  65. static_key_slow_inc(&rps_needed);
  66. if (orig_sock_table) {
  67. static_key_slow_dec(&rps_needed);
  68. synchronize_rcu();
  69. vfree(orig_sock_table);
  70. }
  71. }
  72. }
  73. mutex_unlock(&sock_flow_mutex);
  74. return ret;
  75. }
  76. #endif /* CONFIG_RPS */
  77. #ifdef CONFIG_NET_FLOW_LIMIT
  78. static DEFINE_MUTEX(flow_limit_update_mutex);
  79. static int flow_limit_cpu_sysctl(struct ctl_table *table, int write,
  80. void __user *buffer, size_t *lenp,
  81. loff_t *ppos)
  82. {
  83. struct sd_flow_limit *cur;
  84. struct softnet_data *sd;
  85. cpumask_var_t mask;
  86. int i, len, ret = 0;
  87. if (!alloc_cpumask_var(&mask, GFP_KERNEL))
  88. return -ENOMEM;
  89. if (write) {
  90. ret = cpumask_parse_user(buffer, *lenp, mask);
  91. if (ret)
  92. goto done;
  93. mutex_lock(&flow_limit_update_mutex);
  94. len = sizeof(*cur) + netdev_flow_limit_table_len;
  95. for_each_possible_cpu(i) {
  96. sd = &per_cpu(softnet_data, i);
  97. cur = rcu_dereference_protected(sd->flow_limit,
  98. lockdep_is_held(&flow_limit_update_mutex));
  99. if (cur && !cpumask_test_cpu(i, mask)) {
  100. RCU_INIT_POINTER(sd->flow_limit, NULL);
  101. synchronize_rcu();
  102. kfree(cur);
  103. } else if (!cur && cpumask_test_cpu(i, mask)) {
  104. cur = kzalloc(len, GFP_KERNEL);
  105. if (!cur) {
  106. /* not unwinding previous changes */
  107. ret = -ENOMEM;
  108. goto write_unlock;
  109. }
  110. cur->num_buckets = netdev_flow_limit_table_len;
  111. rcu_assign_pointer(sd->flow_limit, cur);
  112. }
  113. }
  114. write_unlock:
  115. mutex_unlock(&flow_limit_update_mutex);
  116. } else {
  117. char kbuf[128];
  118. if (*ppos || !*lenp) {
  119. *lenp = 0;
  120. goto done;
  121. }
  122. cpumask_clear(mask);
  123. rcu_read_lock();
  124. for_each_possible_cpu(i) {
  125. sd = &per_cpu(softnet_data, i);
  126. if (rcu_dereference(sd->flow_limit))
  127. cpumask_set_cpu(i, mask);
  128. }
  129. rcu_read_unlock();
  130. len = min(sizeof(kbuf) - 1, *lenp);
  131. len = cpumask_scnprintf(kbuf, len, mask);
  132. if (!len) {
  133. *lenp = 0;
  134. goto done;
  135. }
  136. if (len < *lenp)
  137. kbuf[len++] = '\n';
  138. if (copy_to_user(buffer, kbuf, len)) {
  139. ret = -EFAULT;
  140. goto done;
  141. }
  142. *lenp = len;
  143. *ppos += len;
  144. }
  145. done:
  146. free_cpumask_var(mask);
  147. return ret;
  148. }
  149. static int flow_limit_table_len_sysctl(struct ctl_table *table, int write,
  150. void __user *buffer, size_t *lenp,
  151. loff_t *ppos)
  152. {
  153. unsigned int old, *ptr;
  154. int ret;
  155. mutex_lock(&flow_limit_update_mutex);
  156. ptr = table->data;
  157. old = *ptr;
  158. ret = proc_dointvec(table, write, buffer, lenp, ppos);
  159. if (!ret && write && !is_power_of_2(*ptr)) {
  160. *ptr = old;
  161. ret = -EINVAL;
  162. }
  163. mutex_unlock(&flow_limit_update_mutex);
  164. return ret;
  165. }
  166. #endif /* CONFIG_NET_FLOW_LIMIT */
  167. static struct ctl_table net_core_table[] = {
  168. #ifdef CONFIG_NET
  169. {
  170. .procname = "wmem_max",
  171. .data = &sysctl_wmem_max,
  172. .maxlen = sizeof(int),
  173. .mode = 0644,
  174. .proc_handler = proc_dointvec_minmax,
  175. .extra1 = &one,
  176. },
  177. {
  178. .procname = "rmem_max",
  179. .data = &sysctl_rmem_max,
  180. .maxlen = sizeof(int),
  181. .mode = 0644,
  182. .proc_handler = proc_dointvec_minmax,
  183. .extra1 = &one,
  184. },
  185. {
  186. .procname = "wmem_default",
  187. .data = &sysctl_wmem_default,
  188. .maxlen = sizeof(int),
  189. .mode = 0644,
  190. .proc_handler = proc_dointvec_minmax,
  191. .extra1 = &one,
  192. },
  193. {
  194. .procname = "rmem_default",
  195. .data = &sysctl_rmem_default,
  196. .maxlen = sizeof(int),
  197. .mode = 0644,
  198. .proc_handler = proc_dointvec_minmax,
  199. .extra1 = &one,
  200. },
  201. {
  202. .procname = "dev_weight",
  203. .data = &weight_p,
  204. .maxlen = sizeof(int),
  205. .mode = 0644,
  206. .proc_handler = proc_dointvec
  207. },
  208. {
  209. .procname = "netdev_max_backlog",
  210. .data = &netdev_max_backlog,
  211. .maxlen = sizeof(int),
  212. .mode = 0644,
  213. .proc_handler = proc_dointvec
  214. },
  215. #ifdef CONFIG_BPF_JIT
  216. {
  217. .procname = "bpf_jit_enable",
  218. .data = &bpf_jit_enable,
  219. .maxlen = sizeof(int),
  220. .mode = 0644,
  221. .proc_handler = proc_dointvec
  222. },
  223. #endif
  224. {
  225. .procname = "netdev_tstamp_prequeue",
  226. .data = &netdev_tstamp_prequeue,
  227. .maxlen = sizeof(int),
  228. .mode = 0644,
  229. .proc_handler = proc_dointvec
  230. },
  231. {
  232. .procname = "message_cost",
  233. .data = &net_ratelimit_state.interval,
  234. .maxlen = sizeof(int),
  235. .mode = 0644,
  236. .proc_handler = proc_dointvec_jiffies,
  237. },
  238. {
  239. .procname = "message_burst",
  240. .data = &net_ratelimit_state.burst,
  241. .maxlen = sizeof(int),
  242. .mode = 0644,
  243. .proc_handler = proc_dointvec,
  244. },
  245. {
  246. .procname = "optmem_max",
  247. .data = &sysctl_optmem_max,
  248. .maxlen = sizeof(int),
  249. .mode = 0644,
  250. .proc_handler = proc_dointvec
  251. },
  252. #ifdef CONFIG_RPS
  253. {
  254. .procname = "rps_sock_flow_entries",
  255. .maxlen = sizeof(int),
  256. .mode = 0644,
  257. .proc_handler = rps_sock_flow_sysctl
  258. },
  259. #endif
  260. #ifdef CONFIG_NET_FLOW_LIMIT
  261. {
  262. .procname = "flow_limit_cpu_bitmap",
  263. .mode = 0644,
  264. .proc_handler = flow_limit_cpu_sysctl
  265. },
  266. {
  267. .procname = "flow_limit_table_len",
  268. .data = &netdev_flow_limit_table_len,
  269. .maxlen = sizeof(int),
  270. .mode = 0644,
  271. .proc_handler = flow_limit_table_len_sysctl
  272. },
  273. #endif /* CONFIG_NET_FLOW_LIMIT */
  274. #ifdef CONFIG_NET_LL_RX_POLL
  275. {
  276. .procname = "low_latency_poll",
  277. .data = &sysctl_net_ll_poll,
  278. .maxlen = sizeof(unsigned int),
  279. .mode = 0644,
  280. .proc_handler = proc_dointvec
  281. },
  282. {
  283. .procname = "low_latency_read",
  284. .data = &sysctl_net_ll_read,
  285. .maxlen = sizeof(unsigned int),
  286. .mode = 0644,
  287. .proc_handler = proc_dointvec
  288. },
  289. #
  290. #endif
  291. #endif /* CONFIG_NET */
  292. {
  293. .procname = "netdev_budget",
  294. .data = &netdev_budget,
  295. .maxlen = sizeof(int),
  296. .mode = 0644,
  297. .proc_handler = proc_dointvec
  298. },
  299. {
  300. .procname = "warnings",
  301. .data = &net_msg_warn,
  302. .maxlen = sizeof(int),
  303. .mode = 0644,
  304. .proc_handler = proc_dointvec
  305. },
  306. { }
  307. };
  308. static struct ctl_table netns_core_table[] = {
  309. {
  310. .procname = "somaxconn",
  311. .data = &init_net.core.sysctl_somaxconn,
  312. .maxlen = sizeof(int),
  313. .mode = 0644,
  314. .proc_handler = proc_dointvec
  315. },
  316. { }
  317. };
  318. static __net_init int sysctl_core_net_init(struct net *net)
  319. {
  320. struct ctl_table *tbl;
  321. net->core.sysctl_somaxconn = SOMAXCONN;
  322. tbl = netns_core_table;
  323. if (!net_eq(net, &init_net)) {
  324. tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
  325. if (tbl == NULL)
  326. goto err_dup;
  327. tbl[0].data = &net->core.sysctl_somaxconn;
  328. /* Don't export any sysctls to unprivileged users */
  329. if (net->user_ns != &init_user_ns) {
  330. tbl[0].procname = NULL;
  331. }
  332. }
  333. net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl);
  334. if (net->core.sysctl_hdr == NULL)
  335. goto err_reg;
  336. return 0;
  337. err_reg:
  338. if (tbl != netns_core_table)
  339. kfree(tbl);
  340. err_dup:
  341. return -ENOMEM;
  342. }
  343. static __net_exit void sysctl_core_net_exit(struct net *net)
  344. {
  345. struct ctl_table *tbl;
  346. tbl = net->core.sysctl_hdr->ctl_table_arg;
  347. unregister_net_sysctl_table(net->core.sysctl_hdr);
  348. BUG_ON(tbl == netns_core_table);
  349. kfree(tbl);
  350. }
  351. static __net_initdata struct pernet_operations sysctl_core_ops = {
  352. .init = sysctl_core_net_init,
  353. .exit = sysctl_core_net_exit,
  354. };
  355. static __init int sysctl_core_init(void)
  356. {
  357. register_net_sysctl(&init_net, "net/core", net_core_table);
  358. return register_pernet_subsys(&sysctl_core_ops);
  359. }
  360. fs_initcall(sysctl_core_init);