net_namespace.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594
  1. #include <linux/workqueue.h>
  2. #include <linux/rtnetlink.h>
  3. #include <linux/cache.h>
  4. #include <linux/slab.h>
  5. #include <linux/list.h>
  6. #include <linux/delay.h>
  7. #include <linux/sched.h>
  8. #include <linux/idr.h>
  9. #include <linux/rculist.h>
  10. #include <linux/nsproxy.h>
  11. #include <linux/netdevice.h>
  12. #include <net/net_namespace.h>
  13. #include <net/netns/generic.h>
  14. #include <net/rtnetlink.h>
  15. /*
  16. * Our network namespace constructor/destructor lists
  17. */
  18. static LIST_HEAD(pernet_list);
  19. static struct list_head *first_device = &pernet_list;
  20. static DEFINE_MUTEX(net_mutex);
  21. LIST_HEAD(net_namespace_list);
  22. EXPORT_SYMBOL_GPL(net_namespace_list);
  23. struct net init_net;
  24. EXPORT_SYMBOL(init_net);
  25. #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
  26. static void unregister_netdevices(struct net *net, struct list_head *list)
  27. {
  28. struct net_device *dev;
  29. /* At exit all network devices most be removed from a network
  30. * namespace. Do this in the reverse order of registeration.
  31. */
  32. for_each_netdev_reverse(net, dev) {
  33. if (dev->rtnl_link_ops)
  34. dev->rtnl_link_ops->dellink(dev, list);
  35. else
  36. unregister_netdevice_queue(dev, list);
  37. }
  38. }
  39. static int ops_init(const struct pernet_operations *ops, struct net *net)
  40. {
  41. int err;
  42. if (ops->id && ops->size) {
  43. void *data = kzalloc(ops->size, GFP_KERNEL);
  44. if (!data)
  45. return -ENOMEM;
  46. err = net_assign_generic(net, *ops->id, data);
  47. if (err) {
  48. kfree(data);
  49. return err;
  50. }
  51. }
  52. if (ops->init)
  53. return ops->init(net);
  54. return 0;
  55. }
  56. static void ops_free(const struct pernet_operations *ops, struct net *net)
  57. {
  58. if (ops->id && ops->size) {
  59. int id = *ops->id;
  60. kfree(net_generic(net, id));
  61. }
  62. }
  63. static void ops_exit_list(const struct pernet_operations *ops,
  64. struct list_head *net_exit_list)
  65. {
  66. struct net *net;
  67. if (ops->exit) {
  68. list_for_each_entry(net, net_exit_list, exit_list)
  69. ops->exit(net);
  70. }
  71. if (&ops->list == first_device) {
  72. LIST_HEAD(dev_kill_list);
  73. rtnl_lock();
  74. list_for_each_entry(net, net_exit_list, exit_list)
  75. unregister_netdevices(net, &dev_kill_list);
  76. unregister_netdevice_many(&dev_kill_list);
  77. rtnl_unlock();
  78. }
  79. if (ops->exit_batch)
  80. ops->exit_batch(net_exit_list);
  81. }
  82. static void ops_free_list(const struct pernet_operations *ops,
  83. struct list_head *net_exit_list)
  84. {
  85. struct net *net;
  86. if (ops->size && ops->id) {
  87. list_for_each_entry(net, net_exit_list, exit_list)
  88. ops_free(ops, net);
  89. }
  90. }
  91. /*
  92. * setup_net runs the initializers for the network namespace object.
  93. */
  94. static __net_init int setup_net(struct net *net)
  95. {
  96. /* Must be called with net_mutex held */
  97. const struct pernet_operations *ops, *saved_ops;
  98. int error = 0;
  99. LIST_HEAD(net_exit_list);
  100. atomic_set(&net->count, 1);
  101. #ifdef NETNS_REFCNT_DEBUG
  102. atomic_set(&net->use_count, 0);
  103. #endif
  104. list_for_each_entry(ops, &pernet_list, list) {
  105. error = ops_init(ops, net);
  106. if (error < 0)
  107. goto out_undo;
  108. }
  109. out:
  110. return error;
  111. out_undo:
  112. /* Walk through the list backwards calling the exit functions
  113. * for the pernet modules whose init functions did not fail.
  114. */
  115. list_add(&net->exit_list, &net_exit_list);
  116. saved_ops = ops;
  117. list_for_each_entry_continue_reverse(ops, &pernet_list, list)
  118. ops_exit_list(ops, &net_exit_list);
  119. ops = saved_ops;
  120. list_for_each_entry_continue_reverse(ops, &pernet_list, list)
  121. ops_free_list(ops, &net_exit_list);
  122. rcu_barrier();
  123. goto out;
  124. }
  125. static struct net_generic *net_alloc_generic(void)
  126. {
  127. struct net_generic *ng;
  128. size_t generic_size = sizeof(struct net_generic) +
  129. INITIAL_NET_GEN_PTRS * sizeof(void *);
  130. ng = kzalloc(generic_size, GFP_KERNEL);
  131. if (ng)
  132. ng->len = INITIAL_NET_GEN_PTRS;
  133. return ng;
  134. }
  135. #ifdef CONFIG_NET_NS
  136. static struct kmem_cache *net_cachep;
  137. static struct workqueue_struct *netns_wq;
  138. static struct net *net_alloc(void)
  139. {
  140. struct net *net = NULL;
  141. struct net_generic *ng;
  142. ng = net_alloc_generic();
  143. if (!ng)
  144. goto out;
  145. net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
  146. if (!net)
  147. goto out_free;
  148. rcu_assign_pointer(net->gen, ng);
  149. out:
  150. return net;
  151. out_free:
  152. kfree(ng);
  153. goto out;
  154. }
  155. static void net_free(struct net *net)
  156. {
  157. #ifdef NETNS_REFCNT_DEBUG
  158. if (unlikely(atomic_read(&net->use_count) != 0)) {
  159. printk(KERN_EMERG "network namespace not free! Usage: %d\n",
  160. atomic_read(&net->use_count));
  161. return;
  162. }
  163. #endif
  164. kfree(net->gen);
  165. kmem_cache_free(net_cachep, net);
  166. }
  167. static struct net *net_create(void)
  168. {
  169. struct net *net;
  170. int rv;
  171. net = net_alloc();
  172. if (!net)
  173. return ERR_PTR(-ENOMEM);
  174. mutex_lock(&net_mutex);
  175. rv = setup_net(net);
  176. if (rv == 0) {
  177. rtnl_lock();
  178. list_add_tail_rcu(&net->list, &net_namespace_list);
  179. rtnl_unlock();
  180. }
  181. mutex_unlock(&net_mutex);
  182. if (rv < 0) {
  183. net_free(net);
  184. return ERR_PTR(rv);
  185. }
  186. return net;
  187. }
  188. struct net *copy_net_ns(unsigned long flags, struct net *old_net)
  189. {
  190. if (!(flags & CLONE_NEWNET))
  191. return get_net(old_net);
  192. return net_create();
  193. }
  194. static DEFINE_SPINLOCK(cleanup_list_lock);
  195. static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
  196. static void cleanup_net(struct work_struct *work)
  197. {
  198. const struct pernet_operations *ops;
  199. struct net *net, *tmp;
  200. LIST_HEAD(net_kill_list);
  201. LIST_HEAD(net_exit_list);
  202. /* Atomically snapshot the list of namespaces to cleanup */
  203. spin_lock_irq(&cleanup_list_lock);
  204. list_replace_init(&cleanup_list, &net_kill_list);
  205. spin_unlock_irq(&cleanup_list_lock);
  206. mutex_lock(&net_mutex);
  207. /* Don't let anyone else find us. */
  208. rtnl_lock();
  209. list_for_each_entry(net, &net_kill_list, cleanup_list) {
  210. list_del_rcu(&net->list);
  211. list_add_tail(&net->exit_list, &net_exit_list);
  212. }
  213. rtnl_unlock();
  214. /*
  215. * Another CPU might be rcu-iterating the list, wait for it.
  216. * This needs to be before calling the exit() notifiers, so
  217. * the rcu_barrier() below isn't sufficient alone.
  218. */
  219. synchronize_rcu();
  220. /* Run all of the network namespace exit methods */
  221. list_for_each_entry_reverse(ops, &pernet_list, list)
  222. ops_exit_list(ops, &net_exit_list);
  223. /* Free the net generic variables */
  224. list_for_each_entry_reverse(ops, &pernet_list, list)
  225. ops_free_list(ops, &net_exit_list);
  226. mutex_unlock(&net_mutex);
  227. /* Ensure there are no outstanding rcu callbacks using this
  228. * network namespace.
  229. */
  230. rcu_barrier();
  231. /* Finally it is safe to free my network namespace structure */
  232. list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
  233. list_del_init(&net->exit_list);
  234. net_free(net);
  235. }
  236. }
  237. static DECLARE_WORK(net_cleanup_work, cleanup_net);
  238. void __put_net(struct net *net)
  239. {
  240. /* Cleanup the network namespace in process context */
  241. unsigned long flags;
  242. spin_lock_irqsave(&cleanup_list_lock, flags);
  243. list_add(&net->cleanup_list, &cleanup_list);
  244. spin_unlock_irqrestore(&cleanup_list_lock, flags);
  245. queue_work(netns_wq, &net_cleanup_work);
  246. }
  247. EXPORT_SYMBOL_GPL(__put_net);
  248. #else
  249. struct net *copy_net_ns(unsigned long flags, struct net *old_net)
  250. {
  251. if (flags & CLONE_NEWNET)
  252. return ERR_PTR(-EINVAL);
  253. return old_net;
  254. }
  255. #endif
  256. struct net *get_net_ns_by_pid(pid_t pid)
  257. {
  258. struct task_struct *tsk;
  259. struct net *net;
  260. /* Lookup the network namespace */
  261. net = ERR_PTR(-ESRCH);
  262. rcu_read_lock();
  263. tsk = find_task_by_vpid(pid);
  264. if (tsk) {
  265. struct nsproxy *nsproxy;
  266. nsproxy = task_nsproxy(tsk);
  267. if (nsproxy)
  268. net = get_net(nsproxy->net_ns);
  269. }
  270. rcu_read_unlock();
  271. return net;
  272. }
  273. EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
  274. static int __init net_ns_init(void)
  275. {
  276. struct net_generic *ng;
  277. #ifdef CONFIG_NET_NS
  278. net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
  279. SMP_CACHE_BYTES,
  280. SLAB_PANIC, NULL);
  281. /* Create workqueue for cleanup */
  282. netns_wq = create_singlethread_workqueue("netns");
  283. if (!netns_wq)
  284. panic("Could not create netns workq");
  285. #endif
  286. ng = net_alloc_generic();
  287. if (!ng)
  288. panic("Could not allocate generic netns");
  289. rcu_assign_pointer(init_net.gen, ng);
  290. mutex_lock(&net_mutex);
  291. if (setup_net(&init_net))
  292. panic("Could not setup the initial network namespace");
  293. rtnl_lock();
  294. list_add_tail_rcu(&init_net.list, &net_namespace_list);
  295. rtnl_unlock();
  296. mutex_unlock(&net_mutex);
  297. return 0;
  298. }
  299. pure_initcall(net_ns_init);
  300. #ifdef CONFIG_NET_NS
  301. static int __register_pernet_operations(struct list_head *list,
  302. struct pernet_operations *ops)
  303. {
  304. struct net *net;
  305. int error;
  306. LIST_HEAD(net_exit_list);
  307. list_add_tail(&ops->list, list);
  308. if (ops->init || (ops->id && ops->size)) {
  309. for_each_net(net) {
  310. error = ops_init(ops, net);
  311. if (error)
  312. goto out_undo;
  313. list_add_tail(&net->exit_list, &net_exit_list);
  314. }
  315. }
  316. return 0;
  317. out_undo:
  318. /* If I have an error cleanup all namespaces I initialized */
  319. list_del(&ops->list);
  320. ops_exit_list(ops, &net_exit_list);
  321. ops_free_list(ops, &net_exit_list);
  322. return error;
  323. }
  324. static void __unregister_pernet_operations(struct pernet_operations *ops)
  325. {
  326. struct net *net;
  327. LIST_HEAD(net_exit_list);
  328. list_del(&ops->list);
  329. for_each_net(net)
  330. list_add_tail(&net->exit_list, &net_exit_list);
  331. ops_exit_list(ops, &net_exit_list);
  332. ops_free_list(ops, &net_exit_list);
  333. }
  334. #else
  335. static int __register_pernet_operations(struct list_head *list,
  336. struct pernet_operations *ops)
  337. {
  338. int err = 0;
  339. err = ops_init(ops, &init_net);
  340. if (err)
  341. ops_free(ops, &init_net);
  342. return err;
  343. }
  344. static void __unregister_pernet_operations(struct pernet_operations *ops)
  345. {
  346. LIST_HEAD(net_exit_list);
  347. list_add(&init_net.exit_list, &net_exit_list);
  348. ops_exit_list(ops, &net_exit_list);
  349. ops_free_list(ops, &net_exit_list);
  350. }
  351. #endif /* CONFIG_NET_NS */
  352. static DEFINE_IDA(net_generic_ids);
  353. static int register_pernet_operations(struct list_head *list,
  354. struct pernet_operations *ops)
  355. {
  356. int error;
  357. if (ops->id) {
  358. again:
  359. error = ida_get_new_above(&net_generic_ids, 1, ops->id);
  360. if (error < 0) {
  361. if (error == -EAGAIN) {
  362. ida_pre_get(&net_generic_ids, GFP_KERNEL);
  363. goto again;
  364. }
  365. return error;
  366. }
  367. }
  368. error = __register_pernet_operations(list, ops);
  369. if (error && ops->id)
  370. ida_remove(&net_generic_ids, *ops->id);
  371. return error;
  372. }
  373. static void unregister_pernet_operations(struct pernet_operations *ops)
  374. {
  375. __unregister_pernet_operations(ops);
  376. if (ops->id)
  377. ida_remove(&net_generic_ids, *ops->id);
  378. }
  379. /**
  380. * register_pernet_subsys - register a network namespace subsystem
  381. * @ops: pernet operations structure for the subsystem
  382. *
  383. * Register a subsystem which has init and exit functions
  384. * that are called when network namespaces are created and
  385. * destroyed respectively.
  386. *
  387. * When registered all network namespace init functions are
  388. * called for every existing network namespace. Allowing kernel
  389. * modules to have a race free view of the set of network namespaces.
  390. *
  391. * When a new network namespace is created all of the init
  392. * methods are called in the order in which they were registered.
  393. *
  394. * When a network namespace is destroyed all of the exit methods
  395. * are called in the reverse of the order with which they were
  396. * registered.
  397. */
  398. int register_pernet_subsys(struct pernet_operations *ops)
  399. {
  400. int error;
  401. mutex_lock(&net_mutex);
  402. error = register_pernet_operations(first_device, ops);
  403. mutex_unlock(&net_mutex);
  404. return error;
  405. }
  406. EXPORT_SYMBOL_GPL(register_pernet_subsys);
  407. /**
  408. * unregister_pernet_subsys - unregister a network namespace subsystem
  409. * @ops: pernet operations structure to manipulate
  410. *
  411. * Remove the pernet operations structure from the list to be
  412. * used when network namespaces are created or destroyed. In
  413. * addition run the exit method for all existing network
  414. * namespaces.
  415. */
  416. void unregister_pernet_subsys(struct pernet_operations *module)
  417. {
  418. mutex_lock(&net_mutex);
  419. unregister_pernet_operations(module);
  420. mutex_unlock(&net_mutex);
  421. }
  422. EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
  423. /**
  424. * register_pernet_device - register a network namespace device
  425. * @ops: pernet operations structure for the subsystem
  426. *
  427. * Register a device which has init and exit functions
  428. * that are called when network namespaces are created and
  429. * destroyed respectively.
  430. *
  431. * When registered all network namespace init functions are
  432. * called for every existing network namespace. Allowing kernel
  433. * modules to have a race free view of the set of network namespaces.
  434. *
  435. * When a new network namespace is created all of the init
  436. * methods are called in the order in which they were registered.
  437. *
  438. * When a network namespace is destroyed all of the exit methods
  439. * are called in the reverse of the order with which they were
  440. * registered.
  441. */
  442. int register_pernet_device(struct pernet_operations *ops)
  443. {
  444. int error;
  445. mutex_lock(&net_mutex);
  446. error = register_pernet_operations(&pernet_list, ops);
  447. if (!error && (first_device == &pernet_list))
  448. first_device = &ops->list;
  449. mutex_unlock(&net_mutex);
  450. return error;
  451. }
  452. EXPORT_SYMBOL_GPL(register_pernet_device);
  453. /**
  454. * unregister_pernet_device - unregister a network namespace netdevice
  455. * @ops: pernet operations structure to manipulate
  456. *
  457. * Remove the pernet operations structure from the list to be
  458. * used when network namespaces are created or destroyed. In
  459. * addition run the exit method for all existing network
  460. * namespaces.
  461. */
  462. void unregister_pernet_device(struct pernet_operations *ops)
  463. {
  464. mutex_lock(&net_mutex);
  465. if (&ops->list == first_device)
  466. first_device = first_device->next;
  467. unregister_pernet_operations(ops);
  468. mutex_unlock(&net_mutex);
  469. }
  470. EXPORT_SYMBOL_GPL(unregister_pernet_device);
  471. static void net_generic_release(struct rcu_head *rcu)
  472. {
  473. struct net_generic *ng;
  474. ng = container_of(rcu, struct net_generic, rcu);
  475. kfree(ng);
  476. }
  477. int net_assign_generic(struct net *net, int id, void *data)
  478. {
  479. struct net_generic *ng, *old_ng;
  480. BUG_ON(!mutex_is_locked(&net_mutex));
  481. BUG_ON(id == 0);
  482. ng = old_ng = net->gen;
  483. if (old_ng->len >= id)
  484. goto assign;
  485. ng = kzalloc(sizeof(struct net_generic) +
  486. id * sizeof(void *), GFP_KERNEL);
  487. if (ng == NULL)
  488. return -ENOMEM;
  489. /*
  490. * Some synchronisation notes:
  491. *
  492. * The net_generic explores the net->gen array inside rcu
  493. * read section. Besides once set the net->gen->ptr[x]
  494. * pointer never changes (see rules in netns/generic.h).
  495. *
  496. * That said, we simply duplicate this array and schedule
  497. * the old copy for kfree after a grace period.
  498. */
  499. ng->len = id;
  500. memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
  501. rcu_assign_pointer(net->gen, ng);
  502. call_rcu(&old_ng->rcu, net_generic_release);
  503. assign:
  504. ng->ptr[id - 1] = data;
  505. return 0;
  506. }
  507. EXPORT_SYMBOL_GPL(net_assign_generic);