123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648 |
- /*
- * INET An implementation of the TCP/IP protocol suite for the LINUX
- * operating system. INET is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * The User Datagram Protocol (UDP).
- *
- * Version: $Id: udp.c,v 1.102 2002/02/01 22:01:04 davem Exp $
- *
- * Authors: Ross Biro
- * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
- * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
- * Alan Cox, <Alan.Cox@linux.org>
- * Hirokazu Takahashi, <taka@valinux.co.jp>
- *
- * Fixes:
- * Alan Cox : verify_area() calls
- * Alan Cox : stopped close while in use off icmp
- * messages. Not a fix but a botch that
- * for udp at least is 'valid'.
- * Alan Cox : Fixed icmp handling properly
- * Alan Cox : Correct error for oversized datagrams
- * Alan Cox : Tidied select() semantics.
- * Alan Cox : udp_err() fixed properly, also now
- * select and read wake correctly on errors
- * Alan Cox : udp_send verify_area moved to avoid mem leak
- * Alan Cox : UDP can count its memory
- * Alan Cox : send to an unknown connection causes
- * an ECONNREFUSED off the icmp, but
- * does NOT close.
- * Alan Cox : Switched to new sk_buff handlers. No more backlog!
- * Alan Cox : Using generic datagram code. Even smaller and the PEEK
- * bug no longer crashes it.
- * Fred Van Kempen : Net2e support for sk->broadcast.
- * Alan Cox : Uses skb_free_datagram
- * Alan Cox : Added get/set sockopt support.
- * Alan Cox : Broadcasting without option set returns EACCES.
- * Alan Cox : No wakeup calls. Instead we now use the callbacks.
- * Alan Cox : Use ip_tos and ip_ttl
- * Alan Cox : SNMP Mibs
- * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support.
- * Matt Dillon : UDP length checks.
- * Alan Cox : Smarter af_inet used properly.
- * Alan Cox : Use new kernel side addressing.
- * Alan Cox : Incorrect return on truncated datagram receive.
- * Arnt Gulbrandsen : New udp_send and stuff
- * Alan Cox : Cache last socket
- * Alan Cox : Route cache
- * Jon Peatfield : Minor efficiency fix to sendto().
- * Mike Shaver : RFC1122 checks.
- * Alan Cox : Nonblocking error fix.
- * Willy Konynenberg : Transparent proxying support.
- * Mike McLagan : Routing by source
- * David S. Miller : New socket lookup architecture.
- * Last socket cache retained as it
- * does have a high hit rate.
- * Olaf Kirch : Don't linearise iovec on sendmsg.
- * Andi Kleen : Some cleanups, cache destination entry
- * for connect.
- * Vitaly E. Lavrov : Transparent proxy revived after year coma.
- * Melvin Smith : Check msg_name not msg_namelen in sendto(),
- * return ENOTCONN for unconnected sockets (POSIX)
- * Janos Farkas : don't deliver multi/broadcasts to a different
- * bound-to-device socket
- * Hirokazu Takahashi : HW checksumming for outgoing UDP
- * datagrams.
- * Hirokazu Takahashi : sendfile() on UDP works now.
- * Arnaldo C. Melo : convert /proc/net/udp to seq_file
- * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
- * Alexey Kuznetsov: allow both IPv4 and IPv6 sockets to bind
- * a single port at the same time.
- * Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
- * James Chapman : Add L2TP encapsulation type.
- *
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
- #include <asm/system.h>
- #include <asm/uaccess.h>
- #include <asm/ioctls.h>
- #include <linux/bootmem.h>
- #include <linux/types.h>
- #include <linux/fcntl.h>
- #include <linux/module.h>
- #include <linux/socket.h>
- #include <linux/sockios.h>
- #include <linux/igmp.h>
- #include <linux/in.h>
- #include <linux/errno.h>
- #include <linux/timer.h>
- #include <linux/mm.h>
- #include <linux/inet.h>
- #include <linux/netdevice.h>
- #include <net/tcp_states.h>
- #include <linux/skbuff.h>
- #include <linux/proc_fs.h>
- #include <linux/seq_file.h>
- #include <net/net_namespace.h>
- #include <net/icmp.h>
- #include <net/route.h>
- #include <net/checksum.h>
- #include <net/xfrm.h>
- #include "udp_impl.h"
- /*
- * Snmp MIB for the UDP layer
- */
- DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
- EXPORT_SYMBOL(udp_statistics);
- DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
- EXPORT_SYMBOL(udp_stats_in6);
- struct hlist_head udp_hash[UDP_HTABLE_SIZE];
- DEFINE_RWLOCK(udp_hash_lock);
- int sysctl_udp_mem[3] __read_mostly;
- int sysctl_udp_rmem_min __read_mostly;
- int sysctl_udp_wmem_min __read_mostly;
- EXPORT_SYMBOL(sysctl_udp_mem);
- EXPORT_SYMBOL(sysctl_udp_rmem_min);
- EXPORT_SYMBOL(sysctl_udp_wmem_min);
- atomic_t udp_memory_allocated;
- EXPORT_SYMBOL(udp_memory_allocated);
- static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
- const struct hlist_head udptable[])
- {
- struct sock *sk;
- struct hlist_node *node;
- sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
- if (sk->sk_net == net && sk->sk_hash == num)
- return 1;
- return 0;
- }
- /**
- * __udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
- *
- * @sk: socket struct in question
- * @snum: port number to look up
- * @udptable: hash list table, must be of UDP_HTABLE_SIZE
- * @saddr_comp: AF-dependent comparison of bound local IP addresses
- */
- int __udp_lib_get_port(struct sock *sk, unsigned short snum,
- struct hlist_head udptable[],
- int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2 ) )
- {
- struct hlist_node *node;
- struct hlist_head *head;
- struct sock *sk2;
- int error = 1;
- struct net *net = sk->sk_net;
- write_lock_bh(&udp_hash_lock);
- if (!snum) {
- int i, low, high, remaining;
- unsigned rover, best, best_size_so_far;
- inet_get_local_port_range(&low, &high);
- remaining = (high - low) + 1;
- best_size_so_far = UINT_MAX;
- best = rover = net_random() % remaining + low;
- /* 1st pass: look for empty (or shortest) hash chain */
- for (i = 0; i < UDP_HTABLE_SIZE; i++) {
- int size = 0;
- head = &udptable[rover & (UDP_HTABLE_SIZE - 1)];
- if (hlist_empty(head))
- goto gotit;
- sk_for_each(sk2, node, head) {
- if (++size >= best_size_so_far)
- goto next;
- }
- best_size_so_far = size;
- best = rover;
- next:
- /* fold back if end of range */
- if (++rover > high)
- rover = low + ((rover - low)
- & (UDP_HTABLE_SIZE - 1));
- }
- /* 2nd pass: find hole in shortest hash chain */
- rover = best;
- for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) {
- if (! __udp_lib_lport_inuse(net, rover, udptable))
- goto gotit;
- rover += UDP_HTABLE_SIZE;
- if (rover > high)
- rover = low + ((rover - low)
- & (UDP_HTABLE_SIZE - 1));
- }
- /* All ports in use! */
- goto fail;
- gotit:
- snum = rover;
- } else {
- head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
- sk_for_each(sk2, node, head)
- if (sk2->sk_hash == snum &&
- sk2 != sk &&
- sk2->sk_net == net &&
- (!sk2->sk_reuse || !sk->sk_reuse) &&
- (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
- || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
- (*saddr_comp)(sk, sk2) )
- goto fail;
- }
- inet_sk(sk)->num = snum;
- sk->sk_hash = snum;
- if (sk_unhashed(sk)) {
- head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
- sk_add_node(sk, head);
- sock_prot_inuse_add(sk->sk_prot, 1);
- }
- error = 0;
- fail:
- write_unlock_bh(&udp_hash_lock);
- return error;
- }
- int udp_get_port(struct sock *sk, unsigned short snum,
- int (*scmp)(const struct sock *, const struct sock *))
- {
- return __udp_lib_get_port(sk, snum, udp_hash, scmp);
- }
- /*
- * IOCTL requests applicable to the UDP protocol
- */
- int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
- {
- switch (cmd) {
- case SIOCOUTQ:
- {
- int amount = atomic_read(&sk->sk_wmem_alloc);
- return put_user(amount, (int __user *)arg);
- }
- case SIOCINQ:
- {
- struct sk_buff *skb;
- unsigned long amount;
- amount = 0;
- spin_lock_bh(&sk->sk_receive_queue.lock);
- skb = skb_peek(&sk->sk_receive_queue);
- if (skb != NULL) {
- /*
- * We will only return the amount
- * of this packet since that is all
- * that will be read.
- */
- amount = skb->len - sizeof(struct udphdr);
- }
- spin_unlock_bh(&sk->sk_receive_queue.lock);
- return put_user(amount, (int __user *)arg);
- }
- default:
- return -ENOIOCTLCMD;
- }
- return 0;
- }
- int udp_disconnect(struct sock *sk, int flags)
- {
- struct inet_sock *inet = inet_sk(sk);
- /*
- * 1003.1g - break association.
- */
- sk->sk_state = TCP_CLOSE;
- inet->daddr = 0;
- inet->dport = 0;
- sk->sk_bound_dev_if = 0;
- if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
- inet_reset_saddr(sk);
- if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
- sk->sk_prot->unhash(sk);
- inet->sport = 0;
- }
- sk_dst_reset(sk);
- return 0;
- }
- /*
- * Socket option code for UDP
- */
- int udp_lib_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen,
- int (*push_pending_frames)(struct sock *))
- {
- struct udp_sock *up = udp_sk(sk);
- int val;
- int err = 0;
- #ifdef CONFIG_IP_UDPLITE
- int is_udplite = IS_UDPLITE(sk);
- #endif
- if (optlen<sizeof(int))
- return -EINVAL;
- if (get_user(val, (int __user *)optval))
- return -EFAULT;
- switch (optname) {
- case UDP_CORK:
- if (val != 0) {
- up->corkflag = 1;
- } else {
- up->corkflag = 0;
- lock_sock(sk);
- (*push_pending_frames)(sk);
- release_sock(sk);
- }
- break;
- case UDP_ENCAP:
- switch (val) {
- case 0:
- case UDP_ENCAP_ESPINUDP:
- case UDP_ENCAP_ESPINUDP_NON_IKE:
- up->encap_rcv = xfrm4_udp_encap_rcv;
- /* FALLTHROUGH */
- case UDP_ENCAP_L2TPINUDP:
- up->encap_type = val;
- break;
- default:
- err = -ENOPROTOOPT;
- break;
- }
- break;
- #ifdef CONFIG_IP_UDPLITE
- /*
- * UDP-Lite's partial checksum coverage (RFC 3828).
- */
- /* The sender sets actual checksum coverage length via this option.
- * The case coverage > packet length is handled by send module. */
- case UDPLITE_SEND_CSCOV:
- if (!is_udplite) /* Disable the option on UDP sockets */
- return -ENOPROTOOPT;
- if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
- val = 8;
- up->pcslen = val;
- up->pcflag |= UDPLITE_SEND_CC;
- break;
- /* The receiver specifies a minimum checksum coverage value. To make
- * sense, this should be set to at least 8 (as done below). If zero is
- * used, this again means full checksum coverage. */
- case UDPLITE_RECV_CSCOV:
- if (!is_udplite) /* Disable the option on UDP sockets */
- return -ENOPROTOOPT;
- if (val != 0 && val < 8) /* Avoid silly minimal values. */
- val = 8;
- up->pcrlen = val;
- up->pcflag |= UDPLITE_RECV_CC;
- break;
- #endif
- default:
- err = -ENOPROTOOPT;
- break;
- }
- return err;
- }
- int udp_lib_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
- {
- struct udp_sock *up = udp_sk(sk);
- int val, len;
- if (get_user(len,optlen))
- return -EFAULT;
- len = min_t(unsigned int, len, sizeof(int));
- if (len < 0)
- return -EINVAL;
- switch (optname) {
- case UDP_CORK:
- val = up->corkflag;
- break;
- case UDP_ENCAP:
- val = up->encap_type;
- break;
- /* The following two cannot be changed on UDP sockets, the return is
- * always 0 (which corresponds to the full checksum coverage of UDP). */
- case UDPLITE_SEND_CSCOV:
- val = up->pcslen;
- break;
- case UDPLITE_RECV_CSCOV:
- val = up->pcrlen;
- break;
- default:
- return -ENOPROTOOPT;
- }
- if (put_user(len, optlen))
- return -EFAULT;
- if (copy_to_user(optval, &val,len))
- return -EFAULT;
- return 0;
- }
- /**
- * udp_poll - wait for a UDP event.
- * @file - file struct
- * @sock - socket
- * @wait - poll table
- *
- * This is same as datagram poll, except for the special case of
- * blocking sockets. If application is using a blocking fd
- * and a packet with checksum error is in the queue;
- * then it could get return from select indicating data available
- * but then block when reading it. Add special case code
- * to work around these arguably broken applications.
- */
- unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
- {
- unsigned int mask = datagram_poll(file, sock, wait);
- struct sock *sk = sock->sk;
- int is_lite = IS_UDPLITE(sk);
- /* Check for false positives due to checksum errors */
- if ( (mask & POLLRDNORM) &&
- !(file->f_flags & O_NONBLOCK) &&
- !(sk->sk_shutdown & RCV_SHUTDOWN)){
- struct sk_buff_head *rcvq = &sk->sk_receive_queue;
- struct sk_buff *skb;
- spin_lock_bh(&rcvq->lock);
- while ((skb = skb_peek(rcvq)) != NULL &&
- udp_lib_checksum_complete(skb)) {
- UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite);
- __skb_unlink(skb, rcvq);
- kfree_skb(skb);
- }
- spin_unlock_bh(&rcvq->lock);
- /* nothing to see, move along */
- if (skb == NULL)
- mask &= ~(POLLIN | POLLRDNORM);
- }
- return mask;
- }
- /* ------------------------------------------------------------------------ */
- #ifdef CONFIG_PROC_FS
- static struct sock *udp_get_first(struct seq_file *seq)
- {
- struct sock *sk;
- struct udp_iter_state *state = seq->private;
- for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
- struct hlist_node *node;
- sk_for_each(sk, node, state->hashtable + state->bucket) {
- if (sk->sk_family == state->family)
- goto found;
- }
- }
- sk = NULL;
- found:
- return sk;
- }
- static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
- {
- struct udp_iter_state *state = seq->private;
- do {
- sk = sk_next(sk);
- try_again:
- ;
- } while (sk && sk->sk_family != state->family);
- if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
- sk = sk_head(state->hashtable + state->bucket);
- goto try_again;
- }
- return sk;
- }
- static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
- {
- struct sock *sk = udp_get_first(seq);
- if (sk)
- while (pos && (sk = udp_get_next(seq, sk)) != NULL)
- --pos;
- return pos ? NULL : sk;
- }
- static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(udp_hash_lock)
- {
- read_lock(&udp_hash_lock);
- return *pos ? udp_get_idx(seq, *pos-1) : (void *)1;
- }
- static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
- {
- struct sock *sk;
- if (v == (void *)1)
- sk = udp_get_idx(seq, 0);
- else
- sk = udp_get_next(seq, v);
- ++*pos;
- return sk;
- }
- static void udp_seq_stop(struct seq_file *seq, void *v)
- __releases(udp_hash_lock)
- {
- read_unlock(&udp_hash_lock);
- }
- static int udp_seq_open(struct inode *inode, struct file *file)
- {
- struct udp_seq_afinfo *afinfo = PDE(inode)->data;
- struct seq_file *seq;
- int rc = -ENOMEM;
- struct udp_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
- if (!s)
- goto out;
- s->family = afinfo->family;
- s->hashtable = afinfo->hashtable;
- s->seq_ops.start = udp_seq_start;
- s->seq_ops.next = udp_seq_next;
- s->seq_ops.show = afinfo->seq_show;
- s->seq_ops.stop = udp_seq_stop;
- rc = seq_open(file, &s->seq_ops);
- if (rc)
- goto out_kfree;
- seq = file->private_data;
- seq->private = s;
- out:
- return rc;
- out_kfree:
- kfree(s);
- goto out;
- }
- /* ------------------------------------------------------------------------ */
- int udp_proc_register(struct udp_seq_afinfo *afinfo)
- {
- struct proc_dir_entry *p;
- int rc = 0;
- if (!afinfo)
- return -EINVAL;
- afinfo->seq_fops->owner = afinfo->owner;
- afinfo->seq_fops->open = udp_seq_open;
- afinfo->seq_fops->read = seq_read;
- afinfo->seq_fops->llseek = seq_lseek;
- afinfo->seq_fops->release = seq_release_private;
- p = proc_net_fops_create(&init_net, afinfo->name, S_IRUGO, afinfo->seq_fops);
- if (p)
- p->data = afinfo;
- else
- rc = -ENOMEM;
- return rc;
- }
- void udp_proc_unregister(struct udp_seq_afinfo *afinfo)
- {
- if (!afinfo)
- return;
- proc_net_remove(&init_net, afinfo->name);
- memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
- }
- #endif /* CONFIG_PROC_FS */
- void __init udp_init(void)
- {
- unsigned long limit;
- /* Set the pressure threshold up by the same strategy of TCP. It is a
- * fraction of global memory that is up to 1/2 at 256 MB, decreasing
- * toward zero with the amount of memory, with a floor of 128 pages.
- */
- limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
- limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
- limit = max(limit, 128UL);
- sysctl_udp_mem[0] = limit / 4 * 3;
- sysctl_udp_mem[1] = limit;
- sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
- sysctl_udp_rmem_min = SK_MEM_QUANTUM;
- sysctl_udp_wmem_min = SK_MEM_QUANTUM;
- }
- EXPORT_SYMBOL(udp_disconnect);
- EXPORT_SYMBOL(udp_hash);
- EXPORT_SYMBOL(udp_hash_lock);
- EXPORT_SYMBOL(udp_ioctl);
- EXPORT_SYMBOL(udp_get_port);
- EXPORT_SYMBOL(udp_lib_getsockopt);
- EXPORT_SYMBOL(udp_lib_setsockopt);
- EXPORT_SYMBOL(udp_poll);
- #ifdef CONFIG_PROC_FS
- EXPORT_SYMBOL(udp_proc_register);
- EXPORT_SYMBOL(udp_proc_unregister);
- #endif
|