1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012 |
- /* Simple program to layout "physical" memory for new lguest guest.
- * Linked high to avoid likely physical memory. */
- #define _LARGEFILE64_SOURCE
- #define _GNU_SOURCE
- #include <stdio.h>
- #include <string.h>
- #include <unistd.h>
- #include <err.h>
- #include <stdint.h>
- #include <stdlib.h>
- #include <elf.h>
- #include <sys/mman.h>
- #include <sys/types.h>
- #include <sys/stat.h>
- #include <sys/wait.h>
- #include <fcntl.h>
- #include <stdbool.h>
- #include <errno.h>
- #include <ctype.h>
- #include <sys/socket.h>
- #include <sys/ioctl.h>
- #include <sys/time.h>
- #include <time.h>
- #include <netinet/in.h>
- #include <net/if.h>
- #include <linux/sockios.h>
- #include <linux/if_tun.h>
- #include <sys/uio.h>
- #include <termios.h>
- #include <getopt.h>
- #include <zlib.h>
- typedef unsigned long long u64;
- typedef uint32_t u32;
- typedef uint16_t u16;
- typedef uint8_t u8;
- #include "../../include/linux/lguest_launcher.h"
- #include "../../include/asm-i386/e820.h"
- #define PAGE_PRESENT 0x7 /* Present, RW, Execute */
- #define NET_PEERNUM 1
- #define BRIDGE_PFX "bridge:"
- #ifndef SIOCBRADDIF
- #define SIOCBRADDIF 0x89a2 /* add interface to bridge */
- #endif
- static bool verbose;
- #define verbose(args...) \
- do { if (verbose) printf(args); } while(0)
- static int waker_fd;
- struct device_list
- {
- fd_set infds;
- int max_infd;
- struct device *dev;
- struct device **lastdev;
- };
- struct device
- {
- struct device *next;
- struct lguest_device_desc *desc;
- void *mem;
- /* Watch this fd if handle_input non-NULL. */
- int fd;
- bool (*handle_input)(int fd, struct device *me);
- /* Watch DMA to this key if handle_input non-NULL. */
- unsigned long watch_key;
- u32 (*handle_output)(int fd, const struct iovec *iov,
- unsigned int num, struct device *me);
- /* Device-specific data. */
- void *priv;
- };
- static int open_or_die(const char *name, int flags)
- {
- int fd = open(name, flags);
- if (fd < 0)
- err(1, "Failed to open %s", name);
- return fd;
- }
- static void *map_zeroed_pages(unsigned long addr, unsigned int num)
- {
- static int fd = -1;
- if (fd == -1)
- fd = open_or_die("/dev/zero", O_RDONLY);
- if (mmap((void *)addr, getpagesize() * num,
- PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_PRIVATE, fd, 0)
- != (void *)addr)
- err(1, "Mmaping %u pages of /dev/zero @%p", num, (void *)addr);
- return (void *)addr;
- }
- /* Find magic string marking entry point, return entry point. */
- static unsigned long entry_point(void *start, void *end,
- unsigned long page_offset)
- {
- void *p;
- for (p = start; p < end; p++)
- if (memcmp(p, "GenuineLguest", strlen("GenuineLguest")) == 0)
- return (long)p + strlen("GenuineLguest") + page_offset;
- err(1, "Is this image a genuine lguest?");
- }
- /* Returns the entry point */
- static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr,
- unsigned long *page_offset)
- {
- void *addr;
- Elf32_Phdr phdr[ehdr->e_phnum];
- unsigned int i;
- unsigned long start = -1UL, end = 0;
- /* Sanity checks. */
- if (ehdr->e_type != ET_EXEC
- || ehdr->e_machine != EM_386
- || ehdr->e_phentsize != sizeof(Elf32_Phdr)
- || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr))
- errx(1, "Malformed elf header");
- if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0)
- err(1, "Seeking to program headers");
- if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr))
- err(1, "Reading program headers");
- *page_offset = 0;
- /* We map the loadable segments at virtual addresses corresponding
- * to their physical addresses (our virtual == guest physical). */
- for (i = 0; i < ehdr->e_phnum; i++) {
- if (phdr[i].p_type != PT_LOAD)
- continue;
- verbose("Section %i: size %i addr %p\n",
- i, phdr[i].p_memsz, (void *)phdr[i].p_paddr);
- /* We expect linear address space. */
- if (!*page_offset)
- *page_offset = phdr[i].p_vaddr - phdr[i].p_paddr;
- else if (*page_offset != phdr[i].p_vaddr - phdr[i].p_paddr)
- errx(1, "Page offset of section %i different", i);
- if (phdr[i].p_paddr < start)
- start = phdr[i].p_paddr;
- if (phdr[i].p_paddr + phdr[i].p_filesz > end)
- end = phdr[i].p_paddr + phdr[i].p_filesz;
- /* We map everything private, writable. */
- addr = mmap((void *)phdr[i].p_paddr,
- phdr[i].p_filesz,
- PROT_READ|PROT_WRITE|PROT_EXEC,
- MAP_FIXED|MAP_PRIVATE,
- elf_fd, phdr[i].p_offset);
- if (addr != (void *)phdr[i].p_paddr)
- err(1, "Mmaping vmlinux seg %i gave %p not %p",
- i, addr, (void *)phdr[i].p_paddr);
- }
- return entry_point((void *)start, (void *)end, *page_offset);
- }
- /* This is amazingly reliable. */
- static unsigned long intuit_page_offset(unsigned char *img, unsigned long len)
- {
- unsigned int i, possibilities[256] = { 0 };
- for (i = 0; i + 4 < len; i++) {
- /* mov 0xXXXXXXXX,%eax */
- if (img[i] == 0xA1 && ++possibilities[img[i+4]] > 3)
- return (unsigned long)img[i+4] << 24;
- }
- errx(1, "could not determine page offset");
- }
- static unsigned long unpack_bzimage(int fd, unsigned long *page_offset)
- {
- gzFile f;
- int ret, len = 0;
- void *img = (void *)0x100000;
- f = gzdopen(fd, "rb");
- while ((ret = gzread(f, img + len, 65536)) > 0)
- len += ret;
- if (ret < 0)
- err(1, "reading image from bzImage");
- verbose("Unpacked size %i addr %p\n", len, img);
- *page_offset = intuit_page_offset(img, len);
- return entry_point(img, img + len, *page_offset);
- }
- static unsigned long load_bzimage(int fd, unsigned long *page_offset)
- {
- unsigned char c;
- int state = 0;
- /* Ugly brute force search for gzip header. */
- while (read(fd, &c, 1) == 1) {
- switch (state) {
- case 0:
- if (c == 0x1F)
- state++;
- break;
- case 1:
- if (c == 0x8B)
- state++;
- else
- state = 0;
- break;
- case 2 ... 8:
- state++;
- break;
- case 9:
- lseek(fd, -10, SEEK_CUR);
- if (c != 0x03) /* Compressed under UNIX. */
- state = -1;
- else
- return unpack_bzimage(fd, page_offset);
- }
- }
- errx(1, "Could not find kernel in bzImage");
- }
- static unsigned long load_kernel(int fd, unsigned long *page_offset)
- {
- Elf32_Ehdr hdr;
- if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr))
- err(1, "Reading kernel");
- if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0)
- return map_elf(fd, &hdr, page_offset);
- return load_bzimage(fd, page_offset);
- }
- static inline unsigned long page_align(unsigned long addr)
- {
- return ((addr + getpagesize()-1) & ~(getpagesize()-1));
- }
- /* initrd gets loaded at top of memory: return length. */
- static unsigned long load_initrd(const char *name, unsigned long mem)
- {
- int ifd;
- struct stat st;
- unsigned long len;
- void *iaddr;
- ifd = open_or_die(name, O_RDONLY);
- if (fstat(ifd, &st) < 0)
- err(1, "fstat() on initrd '%s'", name);
- len = page_align(st.st_size);
- iaddr = mmap((void *)mem - len, st.st_size,
- PROT_READ|PROT_EXEC|PROT_WRITE,
- MAP_FIXED|MAP_PRIVATE, ifd, 0);
- if (iaddr != (void *)mem - len)
- err(1, "Mmaping initrd '%s' returned %p not %p",
- name, iaddr, (void *)mem - len);
- close(ifd);
- verbose("mapped initrd %s size=%lu @ %p\n", name, st.st_size, iaddr);
- return len;
- }
- static unsigned long setup_pagetables(unsigned long mem,
- unsigned long initrd_size,
- unsigned long page_offset)
- {
- u32 *pgdir, *linear;
- unsigned int mapped_pages, i, linear_pages;
- unsigned int ptes_per_page = getpagesize()/sizeof(u32);
- /* If we can map all of memory above page_offset, we do so. */
- if (mem <= -page_offset)
- mapped_pages = mem/getpagesize();
- else
- mapped_pages = -page_offset/getpagesize();
- /* Each linear PTE page can map ptes_per_page pages. */
- linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page;
- /* We lay out top-level then linear mapping immediately below initrd */
- pgdir = (void *)mem - initrd_size - getpagesize();
- linear = (void *)pgdir - linear_pages*getpagesize();
- for (i = 0; i < mapped_pages; i++)
- linear[i] = ((i * getpagesize()) | PAGE_PRESENT);
- /* Now set up pgd so that this memory is at page_offset */
- for (i = 0; i < mapped_pages; i += ptes_per_page) {
- pgdir[(i + page_offset/getpagesize())/ptes_per_page]
- = (((u32)linear + i*sizeof(u32)) | PAGE_PRESENT);
- }
- verbose("Linear mapping of %u pages in %u pte pages at %p\n",
- mapped_pages, linear_pages, linear);
- return (unsigned long)pgdir;
- }
- static void concat(char *dst, char *args[])
- {
- unsigned int i, len = 0;
- for (i = 0; args[i]; i++) {
- strcpy(dst+len, args[i]);
- strcat(dst+len, " ");
- len += strlen(args[i]) + 1;
- }
- /* In case it's empty. */
- dst[len] = '\0';
- }
- static int tell_kernel(u32 pgdir, u32 start, u32 page_offset)
- {
- u32 args[] = { LHREQ_INITIALIZE,
- LGUEST_GUEST_TOP/getpagesize(), /* Just below us */
- pgdir, start, page_offset };
- int fd;
- fd = open_or_die("/dev/lguest", O_RDWR);
- if (write(fd, args, sizeof(args)) < 0)
- err(1, "Writing to /dev/lguest");
- return fd;
- }
- static void set_fd(int fd, struct device_list *devices)
- {
- FD_SET(fd, &devices->infds);
- if (fd > devices->max_infd)
- devices->max_infd = fd;
- }
- /* When input arrives, we tell the kernel to kick lguest out with -EAGAIN. */
- static void wake_parent(int pipefd, int lguest_fd, struct device_list *devices)
- {
- set_fd(pipefd, devices);
- for (;;) {
- fd_set rfds = devices->infds;
- u32 args[] = { LHREQ_BREAK, 1 };
- select(devices->max_infd+1, &rfds, NULL, NULL, NULL);
- if (FD_ISSET(pipefd, &rfds)) {
- int ignorefd;
- if (read(pipefd, &ignorefd, sizeof(ignorefd)) == 0)
- exit(0);
- FD_CLR(ignorefd, &devices->infds);
- } else
- write(lguest_fd, args, sizeof(args));
- }
- }
- static int setup_waker(int lguest_fd, struct device_list *device_list)
- {
- int pipefd[2], child;
- pipe(pipefd);
- child = fork();
- if (child == -1)
- err(1, "forking");
- if (child == 0) {
- close(pipefd[1]);
- wake_parent(pipefd[0], lguest_fd, device_list);
- }
- close(pipefd[0]);
- return pipefd[1];
- }
- static void *_check_pointer(unsigned long addr, unsigned int size,
- unsigned int line)
- {
- if (addr >= LGUEST_GUEST_TOP || addr + size >= LGUEST_GUEST_TOP)
- errx(1, "%s:%i: Invalid address %li", __FILE__, line, addr);
- return (void *)addr;
- }
- #define check_pointer(addr,size) _check_pointer(addr, size, __LINE__)
- /* Returns pointer to dma->used_len */
- static u32 *dma2iov(unsigned long dma, struct iovec iov[], unsigned *num)
- {
- unsigned int i;
- struct lguest_dma *udma;
- udma = check_pointer(dma, sizeof(*udma));
- for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) {
- if (!udma->len[i])
- break;
- iov[i].iov_base = check_pointer(udma->addr[i], udma->len[i]);
- iov[i].iov_len = udma->len[i];
- }
- *num = i;
- return &udma->used_len;
- }
- static u32 *get_dma_buffer(int fd, void *key,
- struct iovec iov[], unsigned int *num, u32 *irq)
- {
- u32 buf[] = { LHREQ_GETDMA, (u32)key };
- unsigned long udma;
- u32 *res;
- udma = write(fd, buf, sizeof(buf));
- if (udma == (unsigned long)-1)
- return NULL;
- /* Kernel stashes irq in ->used_len. */
- res = dma2iov(udma, iov, num);
- *irq = *res;
- return res;
- }
- static void trigger_irq(int fd, u32 irq)
- {
- u32 buf[] = { LHREQ_IRQ, irq };
- if (write(fd, buf, sizeof(buf)) != 0)
- err(1, "Triggering irq %i", irq);
- }
- static void discard_iovec(struct iovec *iov, unsigned int *num)
- {
- static char discard_buf[1024];
- *num = 1;
- iov->iov_base = discard_buf;
- iov->iov_len = sizeof(discard_buf);
- }
- static struct termios orig_term;
- static void restore_term(void)
- {
- tcsetattr(STDIN_FILENO, TCSANOW, &orig_term);
- }
- struct console_abort
- {
- int count;
- struct timeval start;
- };
- /* We DMA input to buffer bound at start of console page. */
- static bool handle_console_input(int fd, struct device *dev)
- {
- u32 irq = 0, *lenp;
- int len;
- unsigned int num;
- struct iovec iov[LGUEST_MAX_DMA_SECTIONS];
- struct console_abort *abort = dev->priv;
- lenp = get_dma_buffer(fd, dev->mem, iov, &num, &irq);
- if (!lenp) {
- warn("console: no dma buffer!");
- discard_iovec(iov, &num);
- }
- len = readv(dev->fd, iov, num);
- if (len <= 0) {
- warnx("Failed to get console input, ignoring console.");
- len = 0;
- }
- if (lenp) {
- *lenp = len;
- trigger_irq(fd, irq);
- }
- /* Three ^C within one second? Exit. */
- if (len == 1 && ((char *)iov[0].iov_base)[0] == 3) {
- if (!abort->count++)
- gettimeofday(&abort->start, NULL);
- else if (abort->count == 3) {
- struct timeval now;
- gettimeofday(&now, NULL);
- if (now.tv_sec <= abort->start.tv_sec+1) {
- /* Make sure waker is not blocked in BREAK */
- u32 args[] = { LHREQ_BREAK, 0 };
- close(waker_fd);
- write(fd, args, sizeof(args));
- exit(2);
- }
- abort->count = 0;
- }
- } else
- abort->count = 0;
- if (!len) {
- restore_term();
- return false;
- }
- return true;
- }
- static u32 handle_console_output(int fd, const struct iovec *iov,
- unsigned num, struct device*dev)
- {
- return writev(STDOUT_FILENO, iov, num);
- }
- static u32 handle_tun_output(int fd, const struct iovec *iov,
- unsigned num, struct device *dev)
- {
- /* Now we've seen output, we should warn if we can't get buffers. */
- *(bool *)dev->priv = true;
- return writev(dev->fd, iov, num);
- }
- static unsigned long peer_offset(unsigned int peernum)
- {
- return 4 * peernum;
- }
- static bool handle_tun_input(int fd, struct device *dev)
- {
- u32 irq = 0, *lenp;
- int len;
- unsigned num;
- struct iovec iov[LGUEST_MAX_DMA_SECTIONS];
- lenp = get_dma_buffer(fd, dev->mem+peer_offset(NET_PEERNUM), iov, &num,
- &irq);
- if (!lenp) {
- if (*(bool *)dev->priv)
- warn("network: no dma buffer!");
- discard_iovec(iov, &num);
- }
- len = readv(dev->fd, iov, num);
- if (len <= 0)
- err(1, "reading network");
- if (lenp) {
- *lenp = len;
- trigger_irq(fd, irq);
- }
- verbose("tun input packet len %i [%02x %02x] (%s)\n", len,
- ((u8 *)iov[0].iov_base)[0], ((u8 *)iov[0].iov_base)[1],
- lenp ? "sent" : "discarded");
- return true;
- }
- static u32 handle_block_output(int fd, const struct iovec *iov,
- unsigned num, struct device *dev)
- {
- struct lguest_block_page *p = dev->mem;
- u32 irq, *lenp;
- unsigned int len, reply_num;
- struct iovec reply[LGUEST_MAX_DMA_SECTIONS];
- off64_t device_len, off = (off64_t)p->sector * 512;
- device_len = *(off64_t *)dev->priv;
- if (off >= device_len)
- err(1, "Bad offset %llu vs %llu", off, device_len);
- if (lseek64(dev->fd, off, SEEK_SET) != off)
- err(1, "Bad seek to sector %i", p->sector);
- verbose("Block: %s at offset %llu\n", p->type ? "WRITE" : "READ", off);
- lenp = get_dma_buffer(fd, dev->mem, reply, &reply_num, &irq);
- if (!lenp)
- err(1, "Block request didn't give us a dma buffer");
- if (p->type) {
- len = writev(dev->fd, iov, num);
- if (off + len > device_len) {
- ftruncate(dev->fd, device_len);
- errx(1, "Write past end %llu+%u", off, len);
- }
- *lenp = 0;
- } else {
- len = readv(dev->fd, reply, reply_num);
- *lenp = len;
- }
- p->result = 1 + (p->bytes != len);
- trigger_irq(fd, irq);
- return 0;
- }
- static void handle_output(int fd, unsigned long dma, unsigned long key,
- struct device_list *devices)
- {
- struct device *i;
- u32 *lenp;
- struct iovec iov[LGUEST_MAX_DMA_SECTIONS];
- unsigned num = 0;
- lenp = dma2iov(dma, iov, &num);
- for (i = devices->dev; i; i = i->next) {
- if (i->handle_output && key == i->watch_key) {
- *lenp = i->handle_output(fd, iov, num, i);
- return;
- }
- }
- warnx("Pending dma %p, key %p", (void *)dma, (void *)key);
- }
- static void handle_input(int fd, struct device_list *devices)
- {
- struct timeval poll = { .tv_sec = 0, .tv_usec = 0 };
- for (;;) {
- struct device *i;
- fd_set fds = devices->infds;
- if (select(devices->max_infd+1, &fds, NULL, NULL, &poll) == 0)
- break;
- for (i = devices->dev; i; i = i->next) {
- if (i->handle_input && FD_ISSET(i->fd, &fds)) {
- if (!i->handle_input(fd, i)) {
- FD_CLR(i->fd, &devices->infds);
- /* Tell waker to ignore it too... */
- write(waker_fd, &i->fd, sizeof(i->fd));
- }
- }
- }
- }
- }
- static struct lguest_device_desc *new_dev_desc(u16 type, u16 features,
- u16 num_pages)
- {
- static unsigned long top = LGUEST_GUEST_TOP;
- struct lguest_device_desc *desc;
- desc = malloc(sizeof(*desc));
- desc->type = type;
- desc->num_pages = num_pages;
- desc->features = features;
- desc->status = 0;
- if (num_pages) {
- top -= num_pages*getpagesize();
- map_zeroed_pages(top, num_pages);
- desc->pfn = top / getpagesize();
- } else
- desc->pfn = 0;
- return desc;
- }
- static struct device *new_device(struct device_list *devices,
- u16 type, u16 num_pages, u16 features,
- int fd,
- bool (*handle_input)(int, struct device *),
- unsigned long watch_off,
- u32 (*handle_output)(int,
- const struct iovec *,
- unsigned,
- struct device *))
- {
- struct device *dev = malloc(sizeof(*dev));
- /* Append to device list. */
- *devices->lastdev = dev;
- dev->next = NULL;
- devices->lastdev = &dev->next;
- dev->fd = fd;
- if (handle_input)
- set_fd(dev->fd, devices);
- dev->desc = new_dev_desc(type, features, num_pages);
- dev->mem = (void *)(dev->desc->pfn * getpagesize());
- dev->handle_input = handle_input;
- dev->watch_key = (unsigned long)dev->mem + watch_off;
- dev->handle_output = handle_output;
- return dev;
- }
- static void setup_console(struct device_list *devices)
- {
- struct device *dev;
- if (tcgetattr(STDIN_FILENO, &orig_term) == 0) {
- struct termios term = orig_term;
- term.c_lflag &= ~(ISIG|ICANON|ECHO);
- tcsetattr(STDIN_FILENO, TCSANOW, &term);
- atexit(restore_term);
- }
- /* We don't currently require a page for the console. */
- dev = new_device(devices, LGUEST_DEVICE_T_CONSOLE, 0, 0,
- STDIN_FILENO, handle_console_input,
- LGUEST_CONSOLE_DMA_KEY, handle_console_output);
- dev->priv = malloc(sizeof(struct console_abort));
- ((struct console_abort *)dev->priv)->count = 0;
- verbose("device %p: console\n",
- (void *)(dev->desc->pfn * getpagesize()));
- }
- static void setup_block_file(const char *filename, struct device_list *devices)
- {
- int fd;
- struct device *dev;
- off64_t *device_len;
- struct lguest_block_page *p;
- fd = open_or_die(filename, O_RDWR|O_LARGEFILE|O_DIRECT);
- dev = new_device(devices, LGUEST_DEVICE_T_BLOCK, 1,
- LGUEST_DEVICE_F_RANDOMNESS,
- fd, NULL, 0, handle_block_output);
- device_len = dev->priv = malloc(sizeof(*device_len));
- *device_len = lseek64(fd, 0, SEEK_END);
- p = dev->mem;
- p->num_sectors = *device_len/512;
- verbose("device %p: block %i sectors\n",
- (void *)(dev->desc->pfn * getpagesize()), p->num_sectors);
- }
- /* We use fnctl locks to reserve network slots (autocleanup!) */
- static unsigned int find_slot(int netfd, const char *filename)
- {
- struct flock fl;
- fl.l_type = F_WRLCK;
- fl.l_whence = SEEK_SET;
- fl.l_len = 1;
- for (fl.l_start = 0;
- fl.l_start < getpagesize()/sizeof(struct lguest_net);
- fl.l_start++) {
- if (fcntl(netfd, F_SETLK, &fl) == 0)
- return fl.l_start;
- }
- errx(1, "No free slots in network file %s", filename);
- }
- static void setup_net_file(const char *filename,
- struct device_list *devices)
- {
- int netfd;
- struct device *dev;
- netfd = open(filename, O_RDWR, 0);
- if (netfd < 0) {
- if (errno == ENOENT) {
- netfd = open(filename, O_RDWR|O_CREAT, 0600);
- if (netfd >= 0) {
- char page[getpagesize()];
- memset(page, 0, sizeof(page));
- write(netfd, page, sizeof(page));
- }
- }
- if (netfd < 0)
- err(1, "cannot open net file '%s'", filename);
- }
- dev = new_device(devices, LGUEST_DEVICE_T_NET, 1,
- find_slot(netfd, filename)|LGUEST_NET_F_NOCSUM,
- -1, NULL, 0, NULL);
- /* We overwrite the /dev/zero mapping with the actual file. */
- if (mmap(dev->mem, getpagesize(), PROT_READ|PROT_WRITE,
- MAP_FIXED|MAP_SHARED, netfd, 0) != dev->mem)
- err(1, "could not mmap '%s'", filename);
- verbose("device %p: shared net %s, peer %i\n",
- (void *)(dev->desc->pfn * getpagesize()), filename,
- dev->desc->features & ~LGUEST_NET_F_NOCSUM);
- }
- static u32 str2ip(const char *ipaddr)
- {
- unsigned int byte[4];
- sscanf(ipaddr, "%u.%u.%u.%u", &byte[0], &byte[1], &byte[2], &byte[3]);
- return (byte[0] << 24) | (byte[1] << 16) | (byte[2] << 8) | byte[3];
- }
- /* adapted from libbridge */
- static void add_to_bridge(int fd, const char *if_name, const char *br_name)
- {
- int ifidx;
- struct ifreq ifr;
- if (!*br_name)
- errx(1, "must specify bridge name");
- ifidx = if_nametoindex(if_name);
- if (!ifidx)
- errx(1, "interface %s does not exist!", if_name);
- strncpy(ifr.ifr_name, br_name, IFNAMSIZ);
- ifr.ifr_ifindex = ifidx;
- if (ioctl(fd, SIOCBRADDIF, &ifr) < 0)
- err(1, "can't add %s to bridge %s", if_name, br_name);
- }
- static void configure_device(int fd, const char *devname, u32 ipaddr,
- unsigned char hwaddr[6])
- {
- struct ifreq ifr;
- struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
- memset(&ifr, 0, sizeof(ifr));
- strcpy(ifr.ifr_name, devname);
- sin->sin_family = AF_INET;
- sin->sin_addr.s_addr = htonl(ipaddr);
- if (ioctl(fd, SIOCSIFADDR, &ifr) != 0)
- err(1, "Setting %s interface address", devname);
- ifr.ifr_flags = IFF_UP;
- if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0)
- err(1, "Bringing interface %s up", devname);
- if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
- err(1, "getting hw address for %s", devname);
- memcpy(hwaddr, ifr.ifr_hwaddr.sa_data, 6);
- }
- static void setup_tun_net(const char *arg, struct device_list *devices)
- {
- struct device *dev;
- struct ifreq ifr;
- int netfd, ipfd;
- u32 ip;
- const char *br_name = NULL;
- netfd = open_or_die("/dev/net/tun", O_RDWR);
- memset(&ifr, 0, sizeof(ifr));
- ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
- strcpy(ifr.ifr_name, "tap%d");
- if (ioctl(netfd, TUNSETIFF, &ifr) != 0)
- err(1, "configuring /dev/net/tun");
- ioctl(netfd, TUNSETNOCSUM, 1);
- /* You will be peer 1: we should create enough jitter to randomize */
- dev = new_device(devices, LGUEST_DEVICE_T_NET, 1,
- NET_PEERNUM|LGUEST_DEVICE_F_RANDOMNESS, netfd,
- handle_tun_input, peer_offset(0), handle_tun_output);
- dev->priv = malloc(sizeof(bool));
- *(bool *)dev->priv = false;
- ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
- if (ipfd < 0)
- err(1, "opening IP socket");
- if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) {
- ip = INADDR_ANY;
- br_name = arg + strlen(BRIDGE_PFX);
- add_to_bridge(ipfd, ifr.ifr_name, br_name);
- } else
- ip = str2ip(arg);
- /* We are peer 0, ie. first slot. */
- configure_device(ipfd, ifr.ifr_name, ip, dev->mem);
- /* Set "promisc" bit: we want every single packet. */
- *((u8 *)dev->mem) |= 0x1;
- close(ipfd);
- verbose("device %p: tun net %u.%u.%u.%u\n",
- (void *)(dev->desc->pfn * getpagesize()),
- (u8)(ip>>24), (u8)(ip>>16), (u8)(ip>>8), (u8)ip);
- if (br_name)
- verbose("attached to bridge: %s\n", br_name);
- }
- /* Now we know how much memory we have, we copy in device descriptors */
- static void map_device_descriptors(struct device_list *devs, unsigned long mem)
- {
- struct device *i;
- unsigned int num;
- struct lguest_device_desc *descs;
- /* Device descriptor array sits just above top of normal memory */
- descs = map_zeroed_pages(mem, 1);
- for (i = devs->dev, num = 0; i; i = i->next, num++) {
- if (num == LGUEST_MAX_DEVICES)
- errx(1, "too many devices");
- verbose("Device %i: %s\n", num,
- i->desc->type == LGUEST_DEVICE_T_NET ? "net"
- : i->desc->type == LGUEST_DEVICE_T_CONSOLE ? "console"
- : i->desc->type == LGUEST_DEVICE_T_BLOCK ? "block"
- : "unknown");
- descs[num] = *i->desc;
- free(i->desc);
- i->desc = &descs[num];
- }
- }
- static void __attribute__((noreturn))
- run_guest(int lguest_fd, struct device_list *device_list)
- {
- for (;;) {
- u32 args[] = { LHREQ_BREAK, 0 };
- unsigned long arr[2];
- int readval;
- /* We read from the /dev/lguest device to run the Guest. */
- readval = read(lguest_fd, arr, sizeof(arr));
- if (readval == sizeof(arr)) {
- handle_output(lguest_fd, arr[0], arr[1], device_list);
- continue;
- } else if (errno == ENOENT) {
- char reason[1024] = { 0 };
- read(lguest_fd, reason, sizeof(reason)-1);
- errx(1, "%s", reason);
- } else if (errno != EAGAIN)
- err(1, "Running guest failed");
- handle_input(lguest_fd, device_list);
- if (write(lguest_fd, args, sizeof(args)) < 0)
- err(1, "Resetting break");
- }
- }
- static struct option opts[] = {
- { "verbose", 0, NULL, 'v' },
- { "sharenet", 1, NULL, 's' },
- { "tunnet", 1, NULL, 't' },
- { "block", 1, NULL, 'b' },
- { "initrd", 1, NULL, 'i' },
- { NULL },
- };
- static void usage(void)
- {
- errx(1, "Usage: lguest [--verbose] "
- "[--sharenet=<filename>|--tunnet=(<ipaddr>|bridge:<bridgename>)\n"
- "|--block=<filename>|--initrd=<filename>]...\n"
- "<mem-in-mb> vmlinux [args...]");
- }
- int main(int argc, char *argv[])
- {
- unsigned long mem, pgdir, start, page_offset, initrd_size = 0;
- int c, lguest_fd;
- struct device_list device_list;
- void *boot = (void *)0;
- const char *initrd_name = NULL;
- device_list.max_infd = -1;
- device_list.dev = NULL;
- device_list.lastdev = &device_list.dev;
- FD_ZERO(&device_list.infds);
- while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) {
- switch (c) {
- case 'v':
- verbose = true;
- break;
- case 's':
- setup_net_file(optarg, &device_list);
- break;
- case 't':
- setup_tun_net(optarg, &device_list);
- break;
- case 'b':
- setup_block_file(optarg, &device_list);
- break;
- case 'i':
- initrd_name = optarg;
- break;
- default:
- warnx("Unknown argument %s", argv[optind]);
- usage();
- }
- }
- if (optind + 2 > argc)
- usage();
- /* We need a console device */
- setup_console(&device_list);
- /* First we map /dev/zero over all of guest-physical memory. */
- mem = atoi(argv[optind]) * 1024 * 1024;
- map_zeroed_pages(0, mem / getpagesize());
- /* Now we load the kernel */
- start = load_kernel(open_or_die(argv[optind+1], O_RDONLY),
- &page_offset);
- /* Write the device descriptors into memory. */
- map_device_descriptors(&device_list, mem);
- /* Map the initrd image if requested */
- if (initrd_name) {
- initrd_size = load_initrd(initrd_name, mem);
- *(unsigned long *)(boot+0x218) = mem - initrd_size;
- *(unsigned long *)(boot+0x21c) = initrd_size;
- *(unsigned char *)(boot+0x210) = 0xFF;
- }
- /* Set up the initial linar pagetables. */
- pgdir = setup_pagetables(mem, initrd_size, page_offset);
- /* E820 memory map: ours is a simple, single region. */
- *(char*)(boot+E820NR) = 1;
- *((struct e820entry *)(boot+E820MAP))
- = ((struct e820entry) { 0, mem, E820_RAM });
- /* Command line pointer and command line (at 4096) */
- *(void **)(boot + 0x228) = boot + 4096;
- concat(boot + 4096, argv+optind+2);
- /* Paravirt type: 1 == lguest */
- *(int *)(boot + 0x23c) = 1;
- lguest_fd = tell_kernel(pgdir, start, page_offset);
- waker_fd = setup_waker(lguest_fd, &device_list);
- run_guest(lguest_fd, &device_list);
- }
|