net_namespace.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. #include <linux/workqueue.h>
  2. #include <linux/rtnetlink.h>
  3. #include <linux/cache.h>
  4. #include <linux/slab.h>
  5. #include <linux/list.h>
  6. #include <linux/delay.h>
  7. #include <linux/sched.h>
  8. #include <linux/idr.h>
  9. #include <linux/rculist.h>
  10. #include <linux/nsproxy.h>
  11. #include <net/net_namespace.h>
  12. #include <net/netns/generic.h>
  13. /*
  14. * Our network namespace constructor/destructor lists
  15. */
  16. static LIST_HEAD(pernet_list);
  17. static struct list_head *first_device = &pernet_list;
  18. static DEFINE_MUTEX(net_mutex);
  19. LIST_HEAD(net_namespace_list);
  20. EXPORT_SYMBOL_GPL(net_namespace_list);
  21. struct net init_net;
  22. EXPORT_SYMBOL(init_net);
  23. #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
  24. /*
  25. * setup_net runs the initializers for the network namespace object.
  26. */
  27. static __net_init int setup_net(struct net *net)
  28. {
  29. /* Must be called with net_mutex held */
  30. struct pernet_operations *ops;
  31. int error = 0;
  32. atomic_set(&net->count, 1);
  33. #ifdef NETNS_REFCNT_DEBUG
  34. atomic_set(&net->use_count, 0);
  35. #endif
  36. list_for_each_entry(ops, &pernet_list, list) {
  37. if (ops->init) {
  38. error = ops->init(net);
  39. if (error < 0)
  40. goto out_undo;
  41. }
  42. }
  43. out:
  44. return error;
  45. out_undo:
  46. /* Walk through the list backwards calling the exit functions
  47. * for the pernet modules whose init functions did not fail.
  48. */
  49. list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
  50. if (ops->exit)
  51. ops->exit(net);
  52. }
  53. rcu_barrier();
  54. goto out;
  55. }
  56. static struct net_generic *net_alloc_generic(void)
  57. {
  58. struct net_generic *ng;
  59. size_t generic_size = sizeof(struct net_generic) +
  60. INITIAL_NET_GEN_PTRS * sizeof(void *);
  61. ng = kzalloc(generic_size, GFP_KERNEL);
  62. if (ng)
  63. ng->len = INITIAL_NET_GEN_PTRS;
  64. return ng;
  65. }
  66. #ifdef CONFIG_NET_NS
  67. static struct kmem_cache *net_cachep;
  68. static struct workqueue_struct *netns_wq;
  69. static struct net *net_alloc(void)
  70. {
  71. struct net *net = NULL;
  72. struct net_generic *ng;
  73. ng = net_alloc_generic();
  74. if (!ng)
  75. goto out;
  76. net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
  77. if (!net)
  78. goto out_free;
  79. rcu_assign_pointer(net->gen, ng);
  80. out:
  81. return net;
  82. out_free:
  83. kfree(ng);
  84. goto out;
  85. }
  86. static void net_free(struct net *net)
  87. {
  88. #ifdef NETNS_REFCNT_DEBUG
  89. if (unlikely(atomic_read(&net->use_count) != 0)) {
  90. printk(KERN_EMERG "network namespace not free! Usage: %d\n",
  91. atomic_read(&net->use_count));
  92. return;
  93. }
  94. #endif
  95. kfree(net->gen);
  96. kmem_cache_free(net_cachep, net);
  97. }
  98. static struct net *net_create(void)
  99. {
  100. struct net *net;
  101. int rv;
  102. net = net_alloc();
  103. if (!net)
  104. return ERR_PTR(-ENOMEM);
  105. mutex_lock(&net_mutex);
  106. rv = setup_net(net);
  107. if (rv == 0) {
  108. rtnl_lock();
  109. list_add_tail_rcu(&net->list, &net_namespace_list);
  110. rtnl_unlock();
  111. }
  112. mutex_unlock(&net_mutex);
  113. if (rv < 0) {
  114. net_free(net);
  115. return ERR_PTR(rv);
  116. }
  117. return net;
  118. }
  119. struct net *copy_net_ns(unsigned long flags, struct net *old_net)
  120. {
  121. if (!(flags & CLONE_NEWNET))
  122. return get_net(old_net);
  123. return net_create();
  124. }
  125. static void cleanup_net(struct work_struct *work)
  126. {
  127. struct pernet_operations *ops;
  128. struct net *net;
  129. net = container_of(work, struct net, work);
  130. mutex_lock(&net_mutex);
  131. /* Don't let anyone else find us. */
  132. rtnl_lock();
  133. list_del_rcu(&net->list);
  134. rtnl_unlock();
  135. /*
  136. * Another CPU might be rcu-iterating the list, wait for it.
  137. * This needs to be before calling the exit() notifiers, so
  138. * the rcu_barrier() below isn't sufficient alone.
  139. */
  140. synchronize_rcu();
  141. /* Run all of the network namespace exit methods */
  142. list_for_each_entry_reverse(ops, &pernet_list, list) {
  143. if (ops->exit)
  144. ops->exit(net);
  145. }
  146. mutex_unlock(&net_mutex);
  147. /* Ensure there are no outstanding rcu callbacks using this
  148. * network namespace.
  149. */
  150. rcu_barrier();
  151. /* Finally it is safe to free my network namespace structure */
  152. net_free(net);
  153. }
  154. void __put_net(struct net *net)
  155. {
  156. /* Cleanup the network namespace in process context */
  157. INIT_WORK(&net->work, cleanup_net);
  158. queue_work(netns_wq, &net->work);
  159. }
  160. EXPORT_SYMBOL_GPL(__put_net);
  161. #else
  162. struct net *copy_net_ns(unsigned long flags, struct net *old_net)
  163. {
  164. if (flags & CLONE_NEWNET)
  165. return ERR_PTR(-EINVAL);
  166. return old_net;
  167. }
  168. #endif
  169. struct net *get_net_ns_by_pid(pid_t pid)
  170. {
  171. struct task_struct *tsk;
  172. struct net *net;
  173. /* Lookup the network namespace */
  174. net = ERR_PTR(-ESRCH);
  175. rcu_read_lock();
  176. tsk = find_task_by_vpid(pid);
  177. if (tsk) {
  178. struct nsproxy *nsproxy;
  179. nsproxy = task_nsproxy(tsk);
  180. if (nsproxy)
  181. net = get_net(nsproxy->net_ns);
  182. }
  183. rcu_read_unlock();
  184. return net;
  185. }
  186. EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
  187. static int __init net_ns_init(void)
  188. {
  189. struct net_generic *ng;
  190. #ifdef CONFIG_NET_NS
  191. net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
  192. SMP_CACHE_BYTES,
  193. SLAB_PANIC, NULL);
  194. /* Create workqueue for cleanup */
  195. netns_wq = create_singlethread_workqueue("netns");
  196. if (!netns_wq)
  197. panic("Could not create netns workq");
  198. #endif
  199. ng = net_alloc_generic();
  200. if (!ng)
  201. panic("Could not allocate generic netns");
  202. rcu_assign_pointer(init_net.gen, ng);
  203. mutex_lock(&net_mutex);
  204. if (setup_net(&init_net))
  205. panic("Could not setup the initial network namespace");
  206. rtnl_lock();
  207. list_add_tail_rcu(&init_net.list, &net_namespace_list);
  208. rtnl_unlock();
  209. mutex_unlock(&net_mutex);
  210. return 0;
  211. }
  212. pure_initcall(net_ns_init);
  213. #ifdef CONFIG_NET_NS
  214. static int register_pernet_operations(struct list_head *list,
  215. struct pernet_operations *ops)
  216. {
  217. struct net *net, *undo_net;
  218. int error;
  219. list_add_tail(&ops->list, list);
  220. if (ops->init) {
  221. for_each_net(net) {
  222. error = ops->init(net);
  223. if (error)
  224. goto out_undo;
  225. }
  226. }
  227. return 0;
  228. out_undo:
  229. /* If I have an error cleanup all namespaces I initialized */
  230. list_del(&ops->list);
  231. if (ops->exit) {
  232. for_each_net(undo_net) {
  233. if (undo_net == net)
  234. goto undone;
  235. ops->exit(undo_net);
  236. }
  237. }
  238. undone:
  239. return error;
  240. }
  241. static void unregister_pernet_operations(struct pernet_operations *ops)
  242. {
  243. struct net *net;
  244. list_del(&ops->list);
  245. if (ops->exit)
  246. for_each_net(net)
  247. ops->exit(net);
  248. }
  249. #else
  250. static int register_pernet_operations(struct list_head *list,
  251. struct pernet_operations *ops)
  252. {
  253. if (ops->init == NULL)
  254. return 0;
  255. return ops->init(&init_net);
  256. }
  257. static void unregister_pernet_operations(struct pernet_operations *ops)
  258. {
  259. if (ops->exit)
  260. ops->exit(&init_net);
  261. }
  262. #endif
  263. static DEFINE_IDA(net_generic_ids);
  264. /**
  265. * register_pernet_subsys - register a network namespace subsystem
  266. * @ops: pernet operations structure for the subsystem
  267. *
  268. * Register a subsystem which has init and exit functions
  269. * that are called when network namespaces are created and
  270. * destroyed respectively.
  271. *
  272. * When registered all network namespace init functions are
  273. * called for every existing network namespace. Allowing kernel
  274. * modules to have a race free view of the set of network namespaces.
  275. *
  276. * When a new network namespace is created all of the init
  277. * methods are called in the order in which they were registered.
  278. *
  279. * When a network namespace is destroyed all of the exit methods
  280. * are called in the reverse of the order with which they were
  281. * registered.
  282. */
  283. int register_pernet_subsys(struct pernet_operations *ops)
  284. {
  285. int error;
  286. mutex_lock(&net_mutex);
  287. error = register_pernet_operations(first_device, ops);
  288. mutex_unlock(&net_mutex);
  289. return error;
  290. }
  291. EXPORT_SYMBOL_GPL(register_pernet_subsys);
  292. /**
  293. * unregister_pernet_subsys - unregister a network namespace subsystem
  294. * @ops: pernet operations structure to manipulate
  295. *
  296. * Remove the pernet operations structure from the list to be
  297. * used when network namespaces are created or destroyed. In
  298. * addition run the exit method for all existing network
  299. * namespaces.
  300. */
  301. void unregister_pernet_subsys(struct pernet_operations *module)
  302. {
  303. mutex_lock(&net_mutex);
  304. unregister_pernet_operations(module);
  305. mutex_unlock(&net_mutex);
  306. }
  307. EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
  308. int register_pernet_gen_subsys(int *id, struct pernet_operations *ops)
  309. {
  310. int rv;
  311. mutex_lock(&net_mutex);
  312. again:
  313. rv = ida_get_new_above(&net_generic_ids, 1, id);
  314. if (rv < 0) {
  315. if (rv == -EAGAIN) {
  316. ida_pre_get(&net_generic_ids, GFP_KERNEL);
  317. goto again;
  318. }
  319. goto out;
  320. }
  321. rv = register_pernet_operations(first_device, ops);
  322. if (rv < 0)
  323. ida_remove(&net_generic_ids, *id);
  324. out:
  325. mutex_unlock(&net_mutex);
  326. return rv;
  327. }
  328. EXPORT_SYMBOL_GPL(register_pernet_gen_subsys);
  329. void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops)
  330. {
  331. mutex_lock(&net_mutex);
  332. unregister_pernet_operations(ops);
  333. ida_remove(&net_generic_ids, id);
  334. mutex_unlock(&net_mutex);
  335. }
  336. EXPORT_SYMBOL_GPL(unregister_pernet_gen_subsys);
  337. /**
  338. * register_pernet_device - register a network namespace device
  339. * @ops: pernet operations structure for the subsystem
  340. *
  341. * Register a device which has init and exit functions
  342. * that are called when network namespaces are created and
  343. * destroyed respectively.
  344. *
  345. * When registered all network namespace init functions are
  346. * called for every existing network namespace. Allowing kernel
  347. * modules to have a race free view of the set of network namespaces.
  348. *
  349. * When a new network namespace is created all of the init
  350. * methods are called in the order in which they were registered.
  351. *
  352. * When a network namespace is destroyed all of the exit methods
  353. * are called in the reverse of the order with which they were
  354. * registered.
  355. */
  356. int register_pernet_device(struct pernet_operations *ops)
  357. {
  358. int error;
  359. mutex_lock(&net_mutex);
  360. error = register_pernet_operations(&pernet_list, ops);
  361. if (!error && (first_device == &pernet_list))
  362. first_device = &ops->list;
  363. mutex_unlock(&net_mutex);
  364. return error;
  365. }
  366. EXPORT_SYMBOL_GPL(register_pernet_device);
  367. int register_pernet_gen_device(int *id, struct pernet_operations *ops)
  368. {
  369. int error;
  370. mutex_lock(&net_mutex);
  371. again:
  372. error = ida_get_new_above(&net_generic_ids, 1, id);
  373. if (error) {
  374. if (error == -EAGAIN) {
  375. ida_pre_get(&net_generic_ids, GFP_KERNEL);
  376. goto again;
  377. }
  378. goto out;
  379. }
  380. error = register_pernet_operations(&pernet_list, ops);
  381. if (error)
  382. ida_remove(&net_generic_ids, *id);
  383. else if (first_device == &pernet_list)
  384. first_device = &ops->list;
  385. out:
  386. mutex_unlock(&net_mutex);
  387. return error;
  388. }
  389. EXPORT_SYMBOL_GPL(register_pernet_gen_device);
  390. /**
  391. * unregister_pernet_device - unregister a network namespace netdevice
  392. * @ops: pernet operations structure to manipulate
  393. *
  394. * Remove the pernet operations structure from the list to be
  395. * used when network namespaces are created or destroyed. In
  396. * addition run the exit method for all existing network
  397. * namespaces.
  398. */
  399. void unregister_pernet_device(struct pernet_operations *ops)
  400. {
  401. mutex_lock(&net_mutex);
  402. if (&ops->list == first_device)
  403. first_device = first_device->next;
  404. unregister_pernet_operations(ops);
  405. mutex_unlock(&net_mutex);
  406. }
  407. EXPORT_SYMBOL_GPL(unregister_pernet_device);
  408. void unregister_pernet_gen_device(int id, struct pernet_operations *ops)
  409. {
  410. mutex_lock(&net_mutex);
  411. if (&ops->list == first_device)
  412. first_device = first_device->next;
  413. unregister_pernet_operations(ops);
  414. ida_remove(&net_generic_ids, id);
  415. mutex_unlock(&net_mutex);
  416. }
  417. EXPORT_SYMBOL_GPL(unregister_pernet_gen_device);
  418. static void net_generic_release(struct rcu_head *rcu)
  419. {
  420. struct net_generic *ng;
  421. ng = container_of(rcu, struct net_generic, rcu);
  422. kfree(ng);
  423. }
  424. int net_assign_generic(struct net *net, int id, void *data)
  425. {
  426. struct net_generic *ng, *old_ng;
  427. BUG_ON(!mutex_is_locked(&net_mutex));
  428. BUG_ON(id == 0);
  429. ng = old_ng = net->gen;
  430. if (old_ng->len >= id)
  431. goto assign;
  432. ng = kzalloc(sizeof(struct net_generic) +
  433. id * sizeof(void *), GFP_KERNEL);
  434. if (ng == NULL)
  435. return -ENOMEM;
  436. /*
  437. * Some synchronisation notes:
  438. *
  439. * The net_generic explores the net->gen array inside rcu
  440. * read section. Besides once set the net->gen->ptr[x]
  441. * pointer never changes (see rules in netns/generic.h).
  442. *
  443. * That said, we simply duplicate this array and schedule
  444. * the old copy for kfree after a grace period.
  445. */
  446. ng->len = id;
  447. memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
  448. rcu_assign_pointer(net->gen, ng);
  449. call_rcu(&old_ng->rcu, net_generic_release);
  450. assign:
  451. ng->ptr[id - 1] = data;
  452. return 0;
  453. }
  454. EXPORT_SYMBOL_GPL(net_assign_generic);