udp_ipv4.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134
  1. /*
  2. * INET An implementation of the TCP/IP protocol suite for the LINUX
  3. * operating system. INET is implemented using the BSD Socket
  4. * interface as the means of communication with the user level.
  5. *
  6. * UDP for IPv4.
  7. *
  8. * For full credits, see net/ipv4/udp.c.
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public License
  12. * as published by the Free Software Foundation; either version
  13. * 2 of the License, or (at your option) any later version.
  14. */
  15. #include <asm/system.h>
  16. #include <asm/uaccess.h>
  17. #include <asm/ioctls.h>
  18. #include <linux/bootmem.h>
  19. #include <linux/types.h>
  20. #include <linux/fcntl.h>
  21. #include <linux/module.h>
  22. #include <linux/socket.h>
  23. #include <linux/sockios.h>
  24. #include <linux/igmp.h>
  25. #include <linux/in.h>
  26. #include <linux/errno.h>
  27. #include <linux/timer.h>
  28. #include <linux/mm.h>
  29. #include <linux/inet.h>
  30. #include <linux/netdevice.h>
  31. #include <net/tcp_states.h>
  32. #include <linux/skbuff.h>
  33. #include <linux/proc_fs.h>
  34. #include <linux/seq_file.h>
  35. #include <net/net_namespace.h>
  36. #include <net/icmp.h>
  37. #include <net/route.h>
  38. #include <net/checksum.h>
  39. #include <net/xfrm.h>
  40. #include "udp_impl.h"
  41. int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
  42. {
  43. struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
  44. return ( !ipv6_only_sock(sk2) &&
  45. (!inet1->rcv_saddr || !inet2->rcv_saddr ||
  46. inet1->rcv_saddr == inet2->rcv_saddr ));
  47. }
  48. static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
  49. {
  50. return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
  51. }
  52. /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
  53. * harder than this. -DaveM
  54. */
  55. static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
  56. __be16 sport, __be32 daddr, __be16 dport,
  57. int dif, struct hlist_head udptable[])
  58. {
  59. struct sock *sk, *result = NULL;
  60. struct hlist_node *node;
  61. unsigned short hnum = ntohs(dport);
  62. int badness = -1;
  63. read_lock(&udp_hash_lock);
  64. sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
  65. struct inet_sock *inet = inet_sk(sk);
  66. if (sk->sk_net == net && sk->sk_hash == hnum &&
  67. !ipv6_only_sock(sk)) {
  68. int score = (sk->sk_family == PF_INET ? 1 : 0);
  69. if (inet->rcv_saddr) {
  70. if (inet->rcv_saddr != daddr)
  71. continue;
  72. score+=2;
  73. }
  74. if (inet->daddr) {
  75. if (inet->daddr != saddr)
  76. continue;
  77. score+=2;
  78. }
  79. if (inet->dport) {
  80. if (inet->dport != sport)
  81. continue;
  82. score+=2;
  83. }
  84. if (sk->sk_bound_dev_if) {
  85. if (sk->sk_bound_dev_if != dif)
  86. continue;
  87. score+=2;
  88. }
  89. if (score == 9) {
  90. result = sk;
  91. break;
  92. } else if (score > badness) {
  93. result = sk;
  94. badness = score;
  95. }
  96. }
  97. }
  98. if (result)
  99. sock_hold(result);
  100. read_unlock(&udp_hash_lock);
  101. return result;
  102. }
  103. static inline struct sock *udp_v4_mcast_next(struct sock *sk,
  104. __be16 loc_port, __be32 loc_addr,
  105. __be16 rmt_port, __be32 rmt_addr,
  106. int dif)
  107. {
  108. struct hlist_node *node;
  109. struct sock *s = sk;
  110. unsigned short hnum = ntohs(loc_port);
  111. sk_for_each_from(s, node) {
  112. struct inet_sock *inet = inet_sk(s);
  113. if (s->sk_hash != hnum ||
  114. (inet->daddr && inet->daddr != rmt_addr) ||
  115. (inet->dport != rmt_port && inet->dport) ||
  116. (inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
  117. ipv6_only_sock(s) ||
  118. (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
  119. continue;
  120. if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
  121. continue;
  122. goto found;
  123. }
  124. s = NULL;
  125. found:
  126. return s;
  127. }
  128. /*
  129. * This routine is called by the ICMP module when it gets some
  130. * sort of error condition. If err < 0 then the socket should
  131. * be closed and the error returned to the user. If err > 0
  132. * it's just the icmp type << 8 | icmp code.
  133. * Header points to the ip header of the error packet. We move
  134. * on past this. Then (as it used to claim before adjustment)
  135. * header points to the first 8 bytes of the udp header. We need
  136. * to find the appropriate port.
  137. */
  138. void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
  139. {
  140. struct inet_sock *inet;
  141. struct iphdr *iph = (struct iphdr*)skb->data;
  142. struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
  143. const int type = icmp_hdr(skb)->type;
  144. const int code = icmp_hdr(skb)->code;
  145. struct sock *sk;
  146. int harderr;
  147. int err;
  148. sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest,
  149. iph->saddr, uh->source, skb->dev->ifindex, udptable);
  150. if (sk == NULL) {
  151. ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
  152. return; /* No socket for error */
  153. }
  154. err = 0;
  155. harderr = 0;
  156. inet = inet_sk(sk);
  157. switch (type) {
  158. default:
  159. case ICMP_TIME_EXCEEDED:
  160. err = EHOSTUNREACH;
  161. break;
  162. case ICMP_SOURCE_QUENCH:
  163. goto out;
  164. case ICMP_PARAMETERPROB:
  165. err = EPROTO;
  166. harderr = 1;
  167. break;
  168. case ICMP_DEST_UNREACH:
  169. if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
  170. if (inet->pmtudisc != IP_PMTUDISC_DONT) {
  171. err = EMSGSIZE;
  172. harderr = 1;
  173. break;
  174. }
  175. goto out;
  176. }
  177. err = EHOSTUNREACH;
  178. if (code <= NR_ICMP_UNREACH) {
  179. harderr = icmp_err_convert[code].fatal;
  180. err = icmp_err_convert[code].errno;
  181. }
  182. break;
  183. }
  184. /*
  185. * RFC1122: OK. Passes ICMP errors back to application, as per
  186. * 4.1.3.3.
  187. */
  188. if (!inet->recverr) {
  189. if (!harderr || sk->sk_state != TCP_ESTABLISHED)
  190. goto out;
  191. } else {
  192. ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
  193. }
  194. sk->sk_err = err;
  195. sk->sk_error_report(sk);
  196. out:
  197. sock_put(sk);
  198. }
  199. void udp_err(struct sk_buff *skb, u32 info)
  200. {
  201. __udp4_lib_err(skb, info, udp_hash);
  202. }
  203. /*
  204. * Throw away all pending data and cancel the corking. Socket is locked.
  205. */
  206. static void udp_flush_pending_frames(struct sock *sk)
  207. {
  208. struct udp_sock *up = udp_sk(sk);
  209. if (up->pending) {
  210. up->len = 0;
  211. up->pending = 0;
  212. ip_flush_pending_frames(sk);
  213. }
  214. }
  215. /**
  216. * udp4_hwcsum_outgoing - handle outgoing HW checksumming
  217. * @sk: socket we are sending on
  218. * @skb: sk_buff containing the filled-in UDP header
  219. * (checksum field must be zeroed out)
  220. */
  221. static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
  222. __be32 src, __be32 dst, int len )
  223. {
  224. unsigned int offset;
  225. struct udphdr *uh = udp_hdr(skb);
  226. __wsum csum = 0;
  227. if (skb_queue_len(&sk->sk_write_queue) == 1) {
  228. /*
  229. * Only one fragment on the socket.
  230. */
  231. skb->csum_start = skb_transport_header(skb) - skb->head;
  232. skb->csum_offset = offsetof(struct udphdr, check);
  233. uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
  234. } else {
  235. /*
  236. * HW-checksum won't work as there are two or more
  237. * fragments on the socket so that all csums of sk_buffs
  238. * should be together
  239. */
  240. offset = skb_transport_offset(skb);
  241. skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
  242. skb->ip_summed = CHECKSUM_NONE;
  243. skb_queue_walk(&sk->sk_write_queue, skb) {
  244. csum = csum_add(csum, skb->csum);
  245. }
  246. uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
  247. if (uh->check == 0)
  248. uh->check = CSUM_MANGLED_0;
  249. }
  250. }
  251. /*
  252. * Push out all pending data as one UDP datagram. Socket is locked.
  253. */
  254. static int udp_push_pending_frames(struct sock *sk)
  255. {
  256. struct udp_sock *up = udp_sk(sk);
  257. struct inet_sock *inet = inet_sk(sk);
  258. struct flowi *fl = &inet->cork.fl;
  259. struct sk_buff *skb;
  260. struct udphdr *uh;
  261. int err = 0;
  262. int is_udplite = IS_UDPLITE(sk);
  263. __wsum csum = 0;
  264. /* Grab the skbuff where UDP header space exists. */
  265. if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
  266. goto out;
  267. /*
  268. * Create a UDP header
  269. */
  270. uh = udp_hdr(skb);
  271. uh->source = fl->fl_ip_sport;
  272. uh->dest = fl->fl_ip_dport;
  273. uh->len = htons(up->len);
  274. uh->check = 0;
  275. if (is_udplite) /* UDP-Lite */
  276. csum = udplite_csum_outgoing(sk, skb);
  277. else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
  278. skb->ip_summed = CHECKSUM_NONE;
  279. goto send;
  280. } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
  281. udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
  282. goto send;
  283. } else /* `normal' UDP */
  284. csum = udp_csum_outgoing(sk, skb);
  285. /* add protocol-dependent pseudo-header */
  286. uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
  287. sk->sk_protocol, csum );
  288. if (uh->check == 0)
  289. uh->check = CSUM_MANGLED_0;
  290. send:
  291. err = ip_push_pending_frames(sk);
  292. out:
  293. up->len = 0;
  294. up->pending = 0;
  295. if (!err)
  296. UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
  297. return err;
  298. }
  299. int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
  300. size_t len)
  301. {
  302. struct inet_sock *inet = inet_sk(sk);
  303. struct udp_sock *up = udp_sk(sk);
  304. int ulen = len;
  305. struct ipcm_cookie ipc;
  306. struct rtable *rt = NULL;
  307. int free = 0;
  308. int connected = 0;
  309. __be32 daddr, faddr, saddr;
  310. __be16 dport;
  311. u8 tos;
  312. int err, is_udplite = IS_UDPLITE(sk);
  313. int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
  314. int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
  315. if (len > 0xFFFF)
  316. return -EMSGSIZE;
  317. /*
  318. * Check the flags.
  319. */
  320. if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
  321. return -EOPNOTSUPP;
  322. ipc.opt = NULL;
  323. if (up->pending) {
  324. /*
  325. * There are pending frames.
  326. * The socket lock must be held while it's corked.
  327. */
  328. lock_sock(sk);
  329. if (likely(up->pending)) {
  330. if (unlikely(up->pending != AF_INET)) {
  331. release_sock(sk);
  332. return -EINVAL;
  333. }
  334. goto do_append_data;
  335. }
  336. release_sock(sk);
  337. }
  338. ulen += sizeof(struct udphdr);
  339. /*
  340. * Get and verify the address.
  341. */
  342. if (msg->msg_name) {
  343. struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
  344. if (msg->msg_namelen < sizeof(*usin))
  345. return -EINVAL;
  346. if (usin->sin_family != AF_INET) {
  347. if (usin->sin_family != AF_UNSPEC)
  348. return -EAFNOSUPPORT;
  349. }
  350. daddr = usin->sin_addr.s_addr;
  351. dport = usin->sin_port;
  352. if (dport == 0)
  353. return -EINVAL;
  354. } else {
  355. if (sk->sk_state != TCP_ESTABLISHED)
  356. return -EDESTADDRREQ;
  357. daddr = inet->daddr;
  358. dport = inet->dport;
  359. /* Open fast path for connected socket.
  360. Route will not be used, if at least one option is set.
  361. */
  362. connected = 1;
  363. }
  364. ipc.addr = inet->saddr;
  365. ipc.oif = sk->sk_bound_dev_if;
  366. if (msg->msg_controllen) {
  367. err = ip_cmsg_send(msg, &ipc);
  368. if (err)
  369. return err;
  370. if (ipc.opt)
  371. free = 1;
  372. connected = 0;
  373. }
  374. if (!ipc.opt)
  375. ipc.opt = inet->opt;
  376. saddr = ipc.addr;
  377. ipc.addr = faddr = daddr;
  378. if (ipc.opt && ipc.opt->srr) {
  379. if (!daddr)
  380. return -EINVAL;
  381. faddr = ipc.opt->faddr;
  382. connected = 0;
  383. }
  384. tos = RT_TOS(inet->tos);
  385. if (sock_flag(sk, SOCK_LOCALROUTE) ||
  386. (msg->msg_flags & MSG_DONTROUTE) ||
  387. (ipc.opt && ipc.opt->is_strictroute)) {
  388. tos |= RTO_ONLINK;
  389. connected = 0;
  390. }
  391. if (ipv4_is_multicast(daddr)) {
  392. if (!ipc.oif)
  393. ipc.oif = inet->mc_index;
  394. if (!saddr)
  395. saddr = inet->mc_addr;
  396. connected = 0;
  397. }
  398. if (connected)
  399. rt = (struct rtable*)sk_dst_check(sk, 0);
  400. if (rt == NULL) {
  401. struct flowi fl = { .oif = ipc.oif,
  402. .nl_u = { .ip4_u =
  403. { .daddr = faddr,
  404. .saddr = saddr,
  405. .tos = tos } },
  406. .proto = sk->sk_protocol,
  407. .uli_u = { .ports =
  408. { .sport = inet->sport,
  409. .dport = dport } } };
  410. security_sk_classify_flow(sk, &fl);
  411. err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1);
  412. if (err) {
  413. if (err == -ENETUNREACH)
  414. IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
  415. goto out;
  416. }
  417. err = -EACCES;
  418. if ((rt->rt_flags & RTCF_BROADCAST) &&
  419. !sock_flag(sk, SOCK_BROADCAST))
  420. goto out;
  421. if (connected)
  422. sk_dst_set(sk, dst_clone(&rt->u.dst));
  423. }
  424. if (msg->msg_flags&MSG_CONFIRM)
  425. goto do_confirm;
  426. back_from_confirm:
  427. saddr = rt->rt_src;
  428. if (!ipc.addr)
  429. daddr = ipc.addr = rt->rt_dst;
  430. lock_sock(sk);
  431. if (unlikely(up->pending)) {
  432. /* The socket is already corked while preparing it. */
  433. /* ... which is an evident application bug. --ANK */
  434. release_sock(sk);
  435. LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
  436. err = -EINVAL;
  437. goto out;
  438. }
  439. /*
  440. * Now cork the socket to pend data.
  441. */
  442. inet->cork.fl.fl4_dst = daddr;
  443. inet->cork.fl.fl_ip_dport = dport;
  444. inet->cork.fl.fl4_src = saddr;
  445. inet->cork.fl.fl_ip_sport = inet->sport;
  446. up->pending = AF_INET;
  447. do_append_data:
  448. up->len += ulen;
  449. getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
  450. err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
  451. sizeof(struct udphdr), &ipc, rt,
  452. corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
  453. if (err)
  454. udp_flush_pending_frames(sk);
  455. else if (!corkreq)
  456. err = udp_push_pending_frames(sk);
  457. else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
  458. up->pending = 0;
  459. release_sock(sk);
  460. out:
  461. ip_rt_put(rt);
  462. if (free)
  463. kfree(ipc.opt);
  464. if (!err)
  465. return len;
  466. /*
  467. * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
  468. * ENOBUFS might not be good (it's not tunable per se), but otherwise
  469. * we don't have a good statistic (IpOutDiscards but it can be too many
  470. * things). We could add another new stat but at least for now that
  471. * seems like overkill.
  472. */
  473. if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
  474. UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite);
  475. }
  476. return err;
  477. do_confirm:
  478. dst_confirm(&rt->u.dst);
  479. if (!(msg->msg_flags&MSG_PROBE) || len)
  480. goto back_from_confirm;
  481. err = 0;
  482. goto out;
  483. }
  484. int udp_sendpage(struct sock *sk, struct page *page, int offset,
  485. size_t size, int flags)
  486. {
  487. struct udp_sock *up = udp_sk(sk);
  488. int ret;
  489. if (!up->pending) {
  490. struct msghdr msg = { .msg_flags = flags|MSG_MORE };
  491. /* Call udp_sendmsg to specify destination address which
  492. * sendpage interface can't pass.
  493. * This will succeed only when the socket is connected.
  494. */
  495. ret = udp_sendmsg(NULL, sk, &msg, 0);
  496. if (ret < 0)
  497. return ret;
  498. }
  499. lock_sock(sk);
  500. if (unlikely(!up->pending)) {
  501. release_sock(sk);
  502. LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
  503. return -EINVAL;
  504. }
  505. ret = ip_append_page(sk, page, offset, size, flags);
  506. if (ret == -EOPNOTSUPP) {
  507. release_sock(sk);
  508. return sock_no_sendpage(sk->sk_socket, page, offset,
  509. size, flags);
  510. }
  511. if (ret < 0) {
  512. udp_flush_pending_frames(sk);
  513. goto out;
  514. }
  515. up->len += size;
  516. if (!(up->corkflag || (flags&MSG_MORE)))
  517. ret = udp_push_pending_frames(sk);
  518. if (!ret)
  519. ret = size;
  520. out:
  521. release_sock(sk);
  522. return ret;
  523. }
  524. /*
  525. * This should be easy, if there is something there we
  526. * return it, otherwise we block.
  527. */
  528. int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
  529. size_t len, int noblock, int flags, int *addr_len)
  530. {
  531. struct inet_sock *inet = inet_sk(sk);
  532. struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
  533. struct sk_buff *skb;
  534. unsigned int ulen, copied;
  535. int peeked;
  536. int err;
  537. int is_udplite = IS_UDPLITE(sk);
  538. /*
  539. * Check any passed addresses
  540. */
  541. if (addr_len)
  542. *addr_len=sizeof(*sin);
  543. if (flags & MSG_ERRQUEUE)
  544. return ip_recv_error(sk, msg, len);
  545. try_again:
  546. skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
  547. &peeked, &err);
  548. if (!skb)
  549. goto out;
  550. ulen = skb->len - sizeof(struct udphdr);
  551. copied = len;
  552. if (copied > ulen)
  553. copied = ulen;
  554. else if (copied < ulen)
  555. msg->msg_flags |= MSG_TRUNC;
  556. /*
  557. * If checksum is needed at all, try to do it while copying the
  558. * data. If the data is truncated, or if we only want a partial
  559. * coverage checksum (UDP-Lite), do it before the copy.
  560. */
  561. if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
  562. if (udp_lib_checksum_complete(skb))
  563. goto csum_copy_err;
  564. }
  565. if (skb_csum_unnecessary(skb))
  566. err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
  567. msg->msg_iov, copied );
  568. else {
  569. err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
  570. if (err == -EINVAL)
  571. goto csum_copy_err;
  572. }
  573. if (err)
  574. goto out_free;
  575. if (!peeked)
  576. UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
  577. sock_recv_timestamp(msg, sk, skb);
  578. /* Copy the address. */
  579. if (sin)
  580. {
  581. sin->sin_family = AF_INET;
  582. sin->sin_port = udp_hdr(skb)->source;
  583. sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
  584. memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
  585. }
  586. if (inet->cmsg_flags)
  587. ip_cmsg_recv(msg, skb);
  588. err = copied;
  589. if (flags & MSG_TRUNC)
  590. err = ulen;
  591. out_free:
  592. lock_sock(sk);
  593. skb_free_datagram(sk, skb);
  594. release_sock(sk);
  595. out:
  596. return err;
  597. csum_copy_err:
  598. lock_sock(sk);
  599. if (!skb_kill_datagram(sk, skb, flags))
  600. UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
  601. release_sock(sk);
  602. if (noblock)
  603. return -EAGAIN;
  604. goto try_again;
  605. }
  606. /* returns:
  607. * -1: error
  608. * 0: success
  609. * >0: "udp encap" protocol resubmission
  610. *
  611. * Note that in the success and error cases, the skb is assumed to
  612. * have either been requeued or freed.
  613. */
  614. int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
  615. {
  616. struct udp_sock *up = udp_sk(sk);
  617. int rc;
  618. int is_udplite = IS_UDPLITE(sk);
  619. /*
  620. * Charge it to the socket, dropping if the queue is full.
  621. */
  622. if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
  623. goto drop;
  624. nf_reset(skb);
  625. if (up->encap_type) {
  626. /*
  627. * This is an encapsulation socket so pass the skb to
  628. * the socket's udp_encap_rcv() hook. Otherwise, just
  629. * fall through and pass this up the UDP socket.
  630. * up->encap_rcv() returns the following value:
  631. * =0 if skb was successfully passed to the encap
  632. * handler or was discarded by it.
  633. * >0 if skb should be passed on to UDP.
  634. * <0 if skb should be resubmitted as proto -N
  635. */
  636. /* if we're overly short, let UDP handle it */
  637. if (skb->len > sizeof(struct udphdr) &&
  638. up->encap_rcv != NULL) {
  639. int ret;
  640. ret = (*up->encap_rcv)(sk, skb);
  641. if (ret <= 0) {
  642. UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS,
  643. is_udplite);
  644. return -ret;
  645. }
  646. }
  647. /* FALLTHROUGH -- it's a UDP Packet */
  648. }
  649. /*
  650. * UDP-Lite specific tests, ignored on UDP sockets
  651. */
  652. if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
  653. /*
  654. * MIB statistics other than incrementing the error count are
  655. * disabled for the following two types of errors: these depend
  656. * on the application settings, not on the functioning of the
  657. * protocol stack as such.
  658. *
  659. * RFC 3828 here recommends (sec 3.3): "There should also be a
  660. * way ... to ... at least let the receiving application block
  661. * delivery of packets with coverage values less than a value
  662. * provided by the application."
  663. */
  664. if (up->pcrlen == 0) { /* full coverage was set */
  665. LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage "
  666. "%d while full coverage %d requested\n",
  667. UDP_SKB_CB(skb)->cscov, skb->len);
  668. goto drop;
  669. }
  670. /* The next case involves violating the min. coverage requested
  671. * by the receiver. This is subtle: if receiver wants x and x is
  672. * greater than the buffersize/MTU then receiver will complain
  673. * that it wants x while sender emits packets of smaller size y.
  674. * Therefore the above ...()->partial_cov statement is essential.
  675. */
  676. if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
  677. LIMIT_NETDEBUG(KERN_WARNING
  678. "UDPLITE: coverage %d too small, need min %d\n",
  679. UDP_SKB_CB(skb)->cscov, up->pcrlen);
  680. goto drop;
  681. }
  682. }
  683. if (sk->sk_filter) {
  684. if (udp_lib_checksum_complete(skb))
  685. goto drop;
  686. }
  687. if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
  688. /* Note that an ENOMEM error is charged twice */
  689. if (rc == -ENOMEM)
  690. UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
  691. goto drop;
  692. }
  693. return 0;
  694. drop:
  695. UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
  696. kfree_skb(skb);
  697. return -1;
  698. }
  699. /*
  700. * Multicasts and broadcasts go to each listener.
  701. *
  702. * Note: called only from the BH handler context,
  703. * so we don't need to lock the hashes.
  704. */
  705. static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
  706. struct udphdr *uh,
  707. __be32 saddr, __be32 daddr,
  708. struct hlist_head udptable[])
  709. {
  710. struct sock *sk;
  711. int dif;
  712. read_lock(&udp_hash_lock);
  713. sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
  714. dif = skb->dev->ifindex;
  715. sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
  716. if (sk) {
  717. struct sock *sknext = NULL;
  718. do {
  719. struct sk_buff *skb1 = skb;
  720. sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
  721. uh->source, saddr, dif);
  722. if (sknext)
  723. skb1 = skb_clone(skb, GFP_ATOMIC);
  724. if (skb1) {
  725. int ret = 0;
  726. bh_lock_sock_nested(sk);
  727. if (!sock_owned_by_user(sk))
  728. ret = udp_queue_rcv_skb(sk, skb1);
  729. else
  730. sk_add_backlog(sk, skb1);
  731. bh_unlock_sock(sk);
  732. if (ret > 0)
  733. /* we should probably re-process instead
  734. * of dropping packets here. */
  735. kfree_skb(skb1);
  736. }
  737. sk = sknext;
  738. } while (sknext);
  739. } else
  740. kfree_skb(skb);
  741. read_unlock(&udp_hash_lock);
  742. return 0;
  743. }
  744. /* Initialize UDP checksum. If exited with zero value (success),
  745. * CHECKSUM_UNNECESSARY means, that no more checks are required.
  746. * Otherwise, csum completion requires chacksumming packet body,
  747. * including udp header and folding it to skb->csum.
  748. */
  749. static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
  750. int proto)
  751. {
  752. const struct iphdr *iph;
  753. int err;
  754. UDP_SKB_CB(skb)->partial_cov = 0;
  755. UDP_SKB_CB(skb)->cscov = skb->len;
  756. if (IS_PROTO_UDPLITE(proto)) {
  757. err = udplite_checksum_init(skb, uh);
  758. if (err)
  759. return err;
  760. }
  761. iph = ip_hdr(skb);
  762. if (uh->check == 0) {
  763. skb->ip_summed = CHECKSUM_UNNECESSARY;
  764. } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
  765. if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
  766. proto, skb->csum))
  767. skb->ip_summed = CHECKSUM_UNNECESSARY;
  768. }
  769. if (!skb_csum_unnecessary(skb))
  770. skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
  771. skb->len, proto, 0);
  772. /* Probably, we should checksum udp header (it should be in cache
  773. * in any case) and data in tiny packets (< rx copybreak).
  774. */
  775. return 0;
  776. }
  777. /*
  778. * All we need to do is get the socket, and then do a checksum.
  779. */
  780. int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
  781. int proto)
  782. {
  783. struct sock *sk;
  784. struct udphdr *uh = udp_hdr(skb);
  785. unsigned short ulen;
  786. struct rtable *rt = skb->rtable;
  787. __be32 saddr = ip_hdr(skb)->saddr;
  788. __be32 daddr = ip_hdr(skb)->daddr;
  789. /*
  790. * Validate the packet.
  791. */
  792. if (!pskb_may_pull(skb, sizeof(struct udphdr)))
  793. goto drop; /* No space for header. */
  794. ulen = ntohs(uh->len);
  795. if (ulen > skb->len)
  796. goto short_packet;
  797. if (IS_PROTO_UDPLITE(proto)) {
  798. /* UDP validates ulen. */
  799. if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
  800. goto short_packet;
  801. uh = udp_hdr(skb);
  802. }
  803. if (udp4_csum_init(skb, uh, proto))
  804. goto csum_error;
  805. if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
  806. return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
  807. sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr,
  808. uh->dest, inet_iif(skb), udptable);
  809. if (sk != NULL) {
  810. int ret = 0;
  811. bh_lock_sock_nested(sk);
  812. if (!sock_owned_by_user(sk))
  813. ret = udp_queue_rcv_skb(sk, skb);
  814. else
  815. sk_add_backlog(sk, skb);
  816. bh_unlock_sock(sk);
  817. sock_put(sk);
  818. /* a return value > 0 means to resubmit the input, but
  819. * it wants the return to be -protocol, or 0
  820. */
  821. if (ret > 0)
  822. return -ret;
  823. return 0;
  824. }
  825. if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
  826. goto drop;
  827. nf_reset(skb);
  828. /* No socket. Drop packet silently, if checksum is wrong */
  829. if (udp_lib_checksum_complete(skb))
  830. goto csum_error;
  831. UDP_INC_STATS_BH(UDP_MIB_NOPORTS, IS_PROTO_UDPLITE(proto));
  832. icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
  833. /*
  834. * Hmm. We got an UDP packet to a port to which we
  835. * don't wanna listen. Ignore it.
  836. */
  837. kfree_skb(skb);
  838. return 0;
  839. short_packet:
  840. LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
  841. IS_PROTO_UDPLITE(proto) ? "-Lite" : "",
  842. NIPQUAD(saddr),
  843. ntohs(uh->source),
  844. ulen,
  845. skb->len,
  846. NIPQUAD(daddr),
  847. ntohs(uh->dest));
  848. goto drop;
  849. csum_error:
  850. /*
  851. * RFC1122: OK. Discards the bad packet silently (as far as
  852. * the network is concerned, anyway) as per 4.1.3.4 (MUST).
  853. */
  854. LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
  855. IS_PROTO_UDPLITE(proto) ? "-Lite" : "",
  856. NIPQUAD(saddr),
  857. ntohs(uh->source),
  858. NIPQUAD(daddr),
  859. ntohs(uh->dest),
  860. ulen);
  861. drop:
  862. UDP_INC_STATS_BH(UDP_MIB_INERRORS, IS_PROTO_UDPLITE(proto));
  863. kfree_skb(skb);
  864. return 0;
  865. }
  866. int udp_rcv(struct sk_buff *skb)
  867. {
  868. return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
  869. }
  870. int udp_destroy_sock(struct sock *sk)
  871. {
  872. lock_sock(sk);
  873. udp_flush_pending_frames(sk);
  874. release_sock(sk);
  875. return 0;
  876. }
  877. int udp_setsockopt(struct sock *sk, int level, int optname,
  878. char __user *optval, int optlen)
  879. {
  880. if (IS_SOL_UDPFAMILY(level))
  881. return udp_lib_setsockopt(sk, level, optname, optval, optlen,
  882. udp_push_pending_frames);
  883. return ip_setsockopt(sk, level, optname, optval, optlen);
  884. }
  885. #ifdef CONFIG_COMPAT
  886. int compat_udp_setsockopt(struct sock *sk, int level, int optname,
  887. char __user *optval, int optlen)
  888. {
  889. if (IS_SOL_UDPFAMILY(level))
  890. return udp_lib_setsockopt(sk, level, optname, optval, optlen,
  891. udp_push_pending_frames);
  892. return compat_ip_setsockopt(sk, level, optname, optval, optlen);
  893. }
  894. #endif
  895. int udp_getsockopt(struct sock *sk, int level, int optname,
  896. char __user *optval, int __user *optlen)
  897. {
  898. if (IS_SOL_UDPFAMILY(level))
  899. return udp_lib_getsockopt(sk, level, optname, optval, optlen);
  900. return ip_getsockopt(sk, level, optname, optval, optlen);
  901. }
  902. #ifdef CONFIG_COMPAT
  903. int compat_udp_getsockopt(struct sock *sk, int level, int optname,
  904. char __user *optval, int __user *optlen)
  905. {
  906. if (IS_SOL_UDPFAMILY(level))
  907. return udp_lib_getsockopt(sk, level, optname, optval, optlen);
  908. return compat_ip_getsockopt(sk, level, optname, optval, optlen);
  909. }
  910. #endif
  911. /* ------------------------------------------------------------------------ */
  912. DEFINE_PROTO_INUSE(udp)
  913. struct proto udp_prot = {
  914. .name = "UDP",
  915. .owner = THIS_MODULE,
  916. .close = udp_lib_close,
  917. .connect = ip4_datagram_connect,
  918. .disconnect = udp_disconnect,
  919. .ioctl = udp_ioctl,
  920. .destroy = udp_destroy_sock,
  921. .setsockopt = udp_setsockopt,
  922. .getsockopt = udp_getsockopt,
  923. .sendmsg = udp_sendmsg,
  924. .recvmsg = udp_recvmsg,
  925. .sendpage = udp_sendpage,
  926. .backlog_rcv = udp_queue_rcv_skb,
  927. .hash = udp_lib_hash,
  928. .unhash = udp_lib_unhash,
  929. .get_port = udp_v4_get_port,
  930. .memory_allocated = &udp_memory_allocated,
  931. .sysctl_mem = sysctl_udp_mem,
  932. .sysctl_wmem = &sysctl_udp_wmem_min,
  933. .sysctl_rmem = &sysctl_udp_rmem_min,
  934. .obj_size = sizeof(struct udp_sock),
  935. #ifdef CONFIG_COMPAT
  936. .compat_setsockopt = compat_udp_setsockopt,
  937. .compat_getsockopt = compat_udp_getsockopt,
  938. #endif
  939. REF_PROTO_INUSE(udp)
  940. };
  941. /* ------------------------------------------------------------------------ */
  942. static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket)
  943. {
  944. struct inet_sock *inet = inet_sk(sp);
  945. __be32 dest = inet->daddr;
  946. __be32 src = inet->rcv_saddr;
  947. __u16 destp = ntohs(inet->dport);
  948. __u16 srcp = ntohs(inet->sport);
  949. sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
  950. " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p",
  951. bucket, src, srcp, dest, destp, sp->sk_state,
  952. atomic_read(&sp->sk_wmem_alloc),
  953. atomic_read(&sp->sk_rmem_alloc),
  954. 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
  955. atomic_read(&sp->sk_refcnt), sp);
  956. }
  957. int udp4_seq_show(struct seq_file *seq, void *v)
  958. {
  959. if (v == SEQ_START_TOKEN)
  960. seq_printf(seq, "%-127s\n",
  961. " sl local_address rem_address st tx_queue "
  962. "rx_queue tr tm->when retrnsmt uid timeout "
  963. "inode");
  964. else {
  965. char tmpbuf[129];
  966. struct udp_iter_state *state = seq->private;
  967. udp4_format_sock(v, tmpbuf, state->bucket);
  968. seq_printf(seq, "%-127s\n", tmpbuf);
  969. }
  970. return 0;
  971. }
  972. /* ------------------------------------------------------------------------ */
  973. #ifdef CONFIG_PROC_FS
  974. static struct file_operations udp4_seq_fops;
  975. static struct udp_seq_afinfo udp4_seq_afinfo = {
  976. .owner = THIS_MODULE,
  977. .name = "udp",
  978. .family = AF_INET,
  979. .hashtable = udp_hash,
  980. .seq_show = udp4_seq_show,
  981. .seq_fops = &udp4_seq_fops,
  982. };
  983. int __init udp4_proc_init(void)
  984. {
  985. return udp_proc_register(&udp4_seq_afinfo);
  986. }
  987. void udp4_proc_exit(void)
  988. {
  989. udp_proc_unregister(&udp4_seq_afinfo);
  990. }
  991. #endif /* CONFIG_PROC_FS */
  992. EXPORT_SYMBOL(udp_prot);
  993. EXPORT_SYMBOL(udp_sendmsg);