nommu.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194
  1. /*
  2. * linux/mm/nommu.c
  3. *
  4. * Replacement code for mm functions to support CPU's that don't
  5. * have any form of memory management unit (thus no virtual memory).
  6. *
  7. * See Documentation/nommu-mmap.txt
  8. *
  9. * Copyright (c) 2004-2005 David Howells <dhowells@redhat.com>
  10. * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
  11. * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
  12. * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com>
  13. */
  14. #include <linux/mm.h>
  15. #include <linux/mman.h>
  16. #include <linux/swap.h>
  17. #include <linux/file.h>
  18. #include <linux/highmem.h>
  19. #include <linux/pagemap.h>
  20. #include <linux/slab.h>
  21. #include <linux/vmalloc.h>
  22. #include <linux/ptrace.h>
  23. #include <linux/blkdev.h>
  24. #include <linux/backing-dev.h>
  25. #include <linux/mount.h>
  26. #include <linux/personality.h>
  27. #include <linux/security.h>
  28. #include <linux/syscalls.h>
  29. #include <asm/uaccess.h>
  30. #include <asm/tlb.h>
  31. #include <asm/tlbflush.h>
  32. void *high_memory;
  33. struct page *mem_map;
  34. unsigned long max_mapnr;
  35. unsigned long num_physpages;
  36. unsigned long askedalloc, realalloc;
  37. atomic_t vm_committed_space = ATOMIC_INIT(0);
  38. int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
  39. int sysctl_overcommit_ratio = 50; /* default is 50% */
  40. int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
  41. int heap_stack_gap = 0;
  42. EXPORT_SYMBOL(mem_map);
  43. EXPORT_SYMBOL(sysctl_max_map_count);
  44. EXPORT_SYMBOL(sysctl_overcommit_memory);
  45. EXPORT_SYMBOL(sysctl_overcommit_ratio);
  46. EXPORT_SYMBOL(vm_committed_space);
  47. EXPORT_SYMBOL(__vm_enough_memory);
  48. /* list of shareable VMAs */
  49. struct rb_root nommu_vma_tree = RB_ROOT;
  50. DECLARE_RWSEM(nommu_vma_sem);
  51. struct vm_operations_struct generic_file_vm_ops = {
  52. };
  53. EXPORT_SYMBOL(vmalloc);
  54. EXPORT_SYMBOL(vfree);
  55. EXPORT_SYMBOL(vmalloc_to_page);
  56. EXPORT_SYMBOL(vmalloc_32);
  57. /*
  58. * Handle all mappings that got truncated by a "truncate()"
  59. * system call.
  60. *
  61. * NOTE! We have to be ready to update the memory sharing
  62. * between the file and the memory map for a potential last
  63. * incomplete page. Ugly, but necessary.
  64. */
  65. int vmtruncate(struct inode *inode, loff_t offset)
  66. {
  67. struct address_space *mapping = inode->i_mapping;
  68. unsigned long limit;
  69. if (inode->i_size < offset)
  70. goto do_expand;
  71. i_size_write(inode, offset);
  72. truncate_inode_pages(mapping, offset);
  73. goto out_truncate;
  74. do_expand:
  75. limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
  76. if (limit != RLIM_INFINITY && offset > limit)
  77. goto out_sig;
  78. if (offset > inode->i_sb->s_maxbytes)
  79. goto out;
  80. i_size_write(inode, offset);
  81. out_truncate:
  82. if (inode->i_op && inode->i_op->truncate)
  83. inode->i_op->truncate(inode);
  84. return 0;
  85. out_sig:
  86. send_sig(SIGXFSZ, current, 0);
  87. out:
  88. return -EFBIG;
  89. }
  90. EXPORT_SYMBOL(vmtruncate);
  91. /*
  92. * Return the total memory allocated for this pointer, not
  93. * just what the caller asked for.
  94. *
  95. * Doesn't have to be accurate, i.e. may have races.
  96. */
  97. unsigned int kobjsize(const void *objp)
  98. {
  99. struct page *page;
  100. if (!objp || !((page = virt_to_page(objp))))
  101. return 0;
  102. if (PageSlab(page))
  103. return ksize(objp);
  104. BUG_ON(page->index < 0);
  105. BUG_ON(page->index >= MAX_ORDER);
  106. return (PAGE_SIZE << page->index);
  107. }
  108. /*
  109. * The nommu dodgy version :-)
  110. */
  111. int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
  112. unsigned long start, int len, int write, int force,
  113. struct page **pages, struct vm_area_struct **vmas)
  114. {
  115. int i;
  116. static struct vm_area_struct dummy_vma;
  117. for (i = 0; i < len; i++) {
  118. if (pages) {
  119. pages[i] = virt_to_page(start);
  120. if (pages[i])
  121. page_cache_get(pages[i]);
  122. }
  123. if (vmas)
  124. vmas[i] = &dummy_vma;
  125. start += PAGE_SIZE;
  126. }
  127. return(i);
  128. }
  129. EXPORT_SYMBOL(get_user_pages);
  130. DEFINE_RWLOCK(vmlist_lock);
  131. struct vm_struct *vmlist;
  132. void vfree(void *addr)
  133. {
  134. kfree(addr);
  135. }
  136. void *__vmalloc(unsigned long size, unsigned int __nocast gfp_mask,
  137. pgprot_t prot)
  138. {
  139. /*
  140. * kmalloc doesn't like __GFP_HIGHMEM for some reason
  141. */
  142. return kmalloc(size, gfp_mask & ~__GFP_HIGHMEM);
  143. }
  144. struct page * vmalloc_to_page(void *addr)
  145. {
  146. return virt_to_page(addr);
  147. }
  148. unsigned long vmalloc_to_pfn(void *addr)
  149. {
  150. return page_to_pfn(virt_to_page(addr));
  151. }
  152. long vread(char *buf, char *addr, unsigned long count)
  153. {
  154. memcpy(buf, addr, count);
  155. return count;
  156. }
  157. long vwrite(char *buf, char *addr, unsigned long count)
  158. {
  159. /* Don't allow overflow */
  160. if ((unsigned long) addr + count < count)
  161. count = -(unsigned long) addr;
  162. memcpy(addr, buf, count);
  163. return(count);
  164. }
  165. /*
  166. * vmalloc - allocate virtually continguos memory
  167. *
  168. * @size: allocation size
  169. *
  170. * Allocate enough pages to cover @size from the page level
  171. * allocator and map them into continguos kernel virtual space.
  172. *
  173. * For tight cotrol over page level allocator and protection flags
  174. * use __vmalloc() instead.
  175. */
  176. void *vmalloc(unsigned long size)
  177. {
  178. return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
  179. }
  180. /*
  181. * vmalloc_32 - allocate virtually continguos memory (32bit addressable)
  182. *
  183. * @size: allocation size
  184. *
  185. * Allocate enough 32bit PA addressable pages to cover @size from the
  186. * page level allocator and map them into continguos kernel virtual space.
  187. */
  188. void *vmalloc_32(unsigned long size)
  189. {
  190. return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
  191. }
  192. void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)
  193. {
  194. BUG();
  195. return NULL;
  196. }
  197. void vunmap(void *addr)
  198. {
  199. BUG();
  200. }
  201. /*
  202. * sys_brk() for the most part doesn't need the global kernel
  203. * lock, except when an application is doing something nasty
  204. * like trying to un-brk an area that has already been mapped
  205. * to a regular file. in this case, the unmapping will need
  206. * to invoke file system routines that need the global lock.
  207. */
  208. asmlinkage unsigned long sys_brk(unsigned long brk)
  209. {
  210. struct mm_struct *mm = current->mm;
  211. if (brk < mm->start_brk || brk > mm->context.end_brk)
  212. return mm->brk;
  213. if (mm->brk == brk)
  214. return mm->brk;
  215. /*
  216. * Always allow shrinking brk
  217. */
  218. if (brk <= mm->brk) {
  219. mm->brk = brk;
  220. return brk;
  221. }
  222. /*
  223. * Ok, looks good - let it rip.
  224. */
  225. return mm->brk = brk;
  226. }
  227. #ifdef DEBUG
  228. static void show_process_blocks(void)
  229. {
  230. struct vm_list_struct *vml;
  231. printk("Process blocks %d:", current->pid);
  232. for (vml = &current->mm->context.vmlist; vml; vml = vml->next) {
  233. printk(" %p: %p", vml, vml->vma);
  234. if (vml->vma)
  235. printk(" (%d @%lx #%d)",
  236. kobjsize((void *) vml->vma->vm_start),
  237. vml->vma->vm_start,
  238. atomic_read(&vml->vma->vm_usage));
  239. printk(vml->next ? " ->" : ".\n");
  240. }
  241. }
  242. #endif /* DEBUG */
  243. static inline struct vm_area_struct *find_nommu_vma(unsigned long start)
  244. {
  245. struct vm_area_struct *vma;
  246. struct rb_node *n = nommu_vma_tree.rb_node;
  247. while (n) {
  248. vma = rb_entry(n, struct vm_area_struct, vm_rb);
  249. if (start < vma->vm_start)
  250. n = n->rb_left;
  251. else if (start > vma->vm_start)
  252. n = n->rb_right;
  253. else
  254. return vma;
  255. }
  256. return NULL;
  257. }
  258. static void add_nommu_vma(struct vm_area_struct *vma)
  259. {
  260. struct vm_area_struct *pvma;
  261. struct address_space *mapping;
  262. struct rb_node **p = &nommu_vma_tree.rb_node;
  263. struct rb_node *parent = NULL;
  264. /* add the VMA to the mapping */
  265. if (vma->vm_file) {
  266. mapping = vma->vm_file->f_mapping;
  267. flush_dcache_mmap_lock(mapping);
  268. vma_prio_tree_insert(vma, &mapping->i_mmap);
  269. flush_dcache_mmap_unlock(mapping);
  270. }
  271. /* add the VMA to the master list */
  272. while (*p) {
  273. parent = *p;
  274. pvma = rb_entry(parent, struct vm_area_struct, vm_rb);
  275. if (vma->vm_start < pvma->vm_start) {
  276. p = &(*p)->rb_left;
  277. }
  278. else if (vma->vm_start > pvma->vm_start) {
  279. p = &(*p)->rb_right;
  280. }
  281. else {
  282. /* mappings are at the same address - this can only
  283. * happen for shared-mem chardevs and shared file
  284. * mappings backed by ramfs/tmpfs */
  285. BUG_ON(!(pvma->vm_flags & VM_SHARED));
  286. if (vma < pvma)
  287. p = &(*p)->rb_left;
  288. else if (vma > pvma)
  289. p = &(*p)->rb_right;
  290. else
  291. BUG();
  292. }
  293. }
  294. rb_link_node(&vma->vm_rb, parent, p);
  295. rb_insert_color(&vma->vm_rb, &nommu_vma_tree);
  296. }
  297. static void delete_nommu_vma(struct vm_area_struct *vma)
  298. {
  299. struct address_space *mapping;
  300. /* remove the VMA from the mapping */
  301. if (vma->vm_file) {
  302. mapping = vma->vm_file->f_mapping;
  303. flush_dcache_mmap_lock(mapping);
  304. vma_prio_tree_remove(vma, &mapping->i_mmap);
  305. flush_dcache_mmap_unlock(mapping);
  306. }
  307. /* remove from the master list */
  308. rb_erase(&vma->vm_rb, &nommu_vma_tree);
  309. }
  310. /*
  311. * determine whether a mapping should be permitted and, if so, what sort of
  312. * mapping we're capable of supporting
  313. */
  314. static int validate_mmap_request(struct file *file,
  315. unsigned long addr,
  316. unsigned long len,
  317. unsigned long prot,
  318. unsigned long flags,
  319. unsigned long pgoff,
  320. unsigned long *_capabilities)
  321. {
  322. unsigned long capabilities;
  323. unsigned long reqprot = prot;
  324. int ret;
  325. /* do the simple checks first */
  326. if (flags & MAP_FIXED || addr) {
  327. printk(KERN_DEBUG
  328. "%d: Can't do fixed-address/overlay mmap of RAM\n",
  329. current->pid);
  330. return -EINVAL;
  331. }
  332. if ((flags & MAP_TYPE) != MAP_PRIVATE &&
  333. (flags & MAP_TYPE) != MAP_SHARED)
  334. return -EINVAL;
  335. if (PAGE_ALIGN(len) == 0)
  336. return addr;
  337. if (len > TASK_SIZE)
  338. return -EINVAL;
  339. /* offset overflow? */
  340. if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
  341. return -EINVAL;
  342. if (file) {
  343. /* validate file mapping requests */
  344. struct address_space *mapping;
  345. /* files must support mmap */
  346. if (!file->f_op || !file->f_op->mmap)
  347. return -ENODEV;
  348. /* work out if what we've got could possibly be shared
  349. * - we support chardevs that provide their own "memory"
  350. * - we support files/blockdevs that are memory backed
  351. */
  352. mapping = file->f_mapping;
  353. if (!mapping)
  354. mapping = file->f_dentry->d_inode->i_mapping;
  355. capabilities = 0;
  356. if (mapping && mapping->backing_dev_info)
  357. capabilities = mapping->backing_dev_info->capabilities;
  358. if (!capabilities) {
  359. /* no explicit capabilities set, so assume some
  360. * defaults */
  361. switch (file->f_dentry->d_inode->i_mode & S_IFMT) {
  362. case S_IFREG:
  363. case S_IFBLK:
  364. capabilities = BDI_CAP_MAP_COPY;
  365. break;
  366. case S_IFCHR:
  367. capabilities =
  368. BDI_CAP_MAP_DIRECT |
  369. BDI_CAP_READ_MAP |
  370. BDI_CAP_WRITE_MAP;
  371. break;
  372. default:
  373. return -EINVAL;
  374. }
  375. }
  376. /* eliminate any capabilities that we can't support on this
  377. * device */
  378. if (!file->f_op->get_unmapped_area)
  379. capabilities &= ~BDI_CAP_MAP_DIRECT;
  380. if (!file->f_op->read)
  381. capabilities &= ~BDI_CAP_MAP_COPY;
  382. if (flags & MAP_SHARED) {
  383. /* do checks for writing, appending and locking */
  384. if ((prot & PROT_WRITE) &&
  385. !(file->f_mode & FMODE_WRITE))
  386. return -EACCES;
  387. if (IS_APPEND(file->f_dentry->d_inode) &&
  388. (file->f_mode & FMODE_WRITE))
  389. return -EACCES;
  390. if (locks_verify_locked(file->f_dentry->d_inode))
  391. return -EAGAIN;
  392. if (!(capabilities & BDI_CAP_MAP_DIRECT))
  393. return -ENODEV;
  394. if (((prot & PROT_READ) && !(capabilities & BDI_CAP_READ_MAP)) ||
  395. ((prot & PROT_WRITE) && !(capabilities & BDI_CAP_WRITE_MAP)) ||
  396. ((prot & PROT_EXEC) && !(capabilities & BDI_CAP_EXEC_MAP))
  397. ) {
  398. printk("MAP_SHARED not completely supported on !MMU\n");
  399. return -EINVAL;
  400. }
  401. /* we mustn't privatise shared mappings */
  402. capabilities &= ~BDI_CAP_MAP_COPY;
  403. }
  404. else {
  405. /* we're going to read the file into private memory we
  406. * allocate */
  407. if (!(capabilities & BDI_CAP_MAP_COPY))
  408. return -ENODEV;
  409. /* we don't permit a private writable mapping to be
  410. * shared with the backing device */
  411. if (prot & PROT_WRITE)
  412. capabilities &= ~BDI_CAP_MAP_DIRECT;
  413. }
  414. /* handle executable mappings and implied executable
  415. * mappings */
  416. if (file->f_vfsmnt->mnt_flags & MNT_NOEXEC) {
  417. if (prot & PROT_EXEC)
  418. return -EPERM;
  419. }
  420. else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) {
  421. /* handle implication of PROT_EXEC by PROT_READ */
  422. if (current->personality & READ_IMPLIES_EXEC) {
  423. if (capabilities & BDI_CAP_EXEC_MAP)
  424. prot |= PROT_EXEC;
  425. }
  426. }
  427. else if ((prot & PROT_READ) &&
  428. (prot & PROT_EXEC) &&
  429. !(capabilities & BDI_CAP_EXEC_MAP)
  430. ) {
  431. /* backing file is not executable, try to copy */
  432. capabilities &= ~BDI_CAP_MAP_DIRECT;
  433. }
  434. }
  435. else {
  436. /* anonymous mappings are always memory backed and can be
  437. * privately mapped
  438. */
  439. capabilities = BDI_CAP_MAP_COPY;
  440. /* handle PROT_EXEC implication by PROT_READ */
  441. if ((prot & PROT_READ) &&
  442. (current->personality & READ_IMPLIES_EXEC))
  443. prot |= PROT_EXEC;
  444. }
  445. /* allow the security API to have its say */
  446. ret = security_file_mmap(file, reqprot, prot, flags);
  447. if (ret < 0)
  448. return ret;
  449. /* looks okay */
  450. *_capabilities = capabilities;
  451. return 0;
  452. }
  453. /*
  454. * we've determined that we can make the mapping, now translate what we
  455. * now know into VMA flags
  456. */
  457. static unsigned long determine_vm_flags(struct file *file,
  458. unsigned long prot,
  459. unsigned long flags,
  460. unsigned long capabilities)
  461. {
  462. unsigned long vm_flags;
  463. vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags);
  464. vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
  465. /* vm_flags |= mm->def_flags; */
  466. if (!(capabilities & BDI_CAP_MAP_DIRECT)) {
  467. /* attempt to share read-only copies of mapped file chunks */
  468. if (file && !(prot & PROT_WRITE))
  469. vm_flags |= VM_MAYSHARE;
  470. }
  471. else {
  472. /* overlay a shareable mapping on the backing device or inode
  473. * if possible - used for chardevs, ramfs/tmpfs/shmfs and
  474. * romfs/cramfs */
  475. if (flags & MAP_SHARED)
  476. vm_flags |= VM_MAYSHARE | VM_SHARED;
  477. else if ((((vm_flags & capabilities) ^ vm_flags) & BDI_CAP_VMFLAGS) == 0)
  478. vm_flags |= VM_MAYSHARE;
  479. }
  480. /* refuse to let anyone share private mappings with this process if
  481. * it's being traced - otherwise breakpoints set in it may interfere
  482. * with another untraced process
  483. */
  484. if ((flags & MAP_PRIVATE) && (current->ptrace & PT_PTRACED))
  485. vm_flags &= ~VM_MAYSHARE;
  486. return vm_flags;
  487. }
  488. /*
  489. * set up a shared mapping on a file
  490. */
  491. static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len)
  492. {
  493. int ret;
  494. ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
  495. if (ret != -ENOSYS)
  496. return ret;
  497. /* getting an ENOSYS error indicates that direct mmap isn't
  498. * possible (as opposed to tried but failed) so we'll fall
  499. * through to making a private copy of the data and mapping
  500. * that if we can */
  501. return -ENODEV;
  502. }
  503. /*
  504. * set up a private mapping or an anonymous shared mapping
  505. */
  506. static int do_mmap_private(struct vm_area_struct *vma, unsigned long len)
  507. {
  508. void *base;
  509. int ret;
  510. /* invoke the file's mapping function so that it can keep track of
  511. * shared mappings on devices or memory
  512. * - VM_MAYSHARE will be set if it may attempt to share
  513. */
  514. if (vma->vm_file) {
  515. ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
  516. if (ret != -ENOSYS) {
  517. /* shouldn't return success if we're not sharing */
  518. BUG_ON(ret == 0 && !(vma->vm_flags & VM_MAYSHARE));
  519. return ret; /* success or a real error */
  520. }
  521. /* getting an ENOSYS error indicates that direct mmap isn't
  522. * possible (as opposed to tried but failed) so we'll try to
  523. * make a private copy of the data and map that instead */
  524. }
  525. /* allocate some memory to hold the mapping
  526. * - note that this may not return a page-aligned address if the object
  527. * we're allocating is smaller than a page
  528. */
  529. base = kmalloc(len, GFP_KERNEL);
  530. if (!base)
  531. goto enomem;
  532. vma->vm_start = (unsigned long) base;
  533. vma->vm_end = vma->vm_start + len;
  534. vma->vm_flags |= VM_MAPPED_COPY;
  535. #ifdef WARN_ON_SLACK
  536. if (len + WARN_ON_SLACK <= kobjsize(result))
  537. printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n",
  538. len, current->pid, kobjsize(result) - len);
  539. #endif
  540. if (vma->vm_file) {
  541. /* read the contents of a file into the copy */
  542. mm_segment_t old_fs;
  543. loff_t fpos;
  544. fpos = vma->vm_pgoff;
  545. fpos <<= PAGE_SHIFT;
  546. old_fs = get_fs();
  547. set_fs(KERNEL_DS);
  548. ret = vma->vm_file->f_op->read(vma->vm_file, base, len, &fpos);
  549. set_fs(old_fs);
  550. if (ret < 0)
  551. goto error_free;
  552. /* clear the last little bit */
  553. if (ret < len)
  554. memset(base + ret, 0, len - ret);
  555. } else {
  556. /* if it's an anonymous mapping, then just clear it */
  557. memset(base, 0, len);
  558. }
  559. return 0;
  560. error_free:
  561. kfree(base);
  562. vma->vm_start = 0;
  563. return ret;
  564. enomem:
  565. printk("Allocation of length %lu from process %d failed\n",
  566. len, current->pid);
  567. show_free_areas();
  568. return -ENOMEM;
  569. }
  570. /*
  571. * handle mapping creation for uClinux
  572. */
  573. unsigned long do_mmap_pgoff(struct file *file,
  574. unsigned long addr,
  575. unsigned long len,
  576. unsigned long prot,
  577. unsigned long flags,
  578. unsigned long pgoff)
  579. {
  580. struct vm_list_struct *vml = NULL;
  581. struct vm_area_struct *vma = NULL;
  582. struct rb_node *rb;
  583. unsigned long capabilities, vm_flags;
  584. void *result;
  585. int ret;
  586. /* decide whether we should attempt the mapping, and if so what sort of
  587. * mapping */
  588. ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
  589. &capabilities);
  590. if (ret < 0)
  591. return ret;
  592. /* we've determined that we can make the mapping, now translate what we
  593. * now know into VMA flags */
  594. vm_flags = determine_vm_flags(file, prot, flags, capabilities);
  595. /* we're going to need to record the mapping if it works */
  596. vml = kmalloc(sizeof(struct vm_list_struct), GFP_KERNEL);
  597. if (!vml)
  598. goto error_getting_vml;
  599. memset(vml, 0, sizeof(*vml));
  600. down_write(&nommu_vma_sem);
  601. /* if we want to share, we need to check for VMAs created by other
  602. * mmap() calls that overlap with our proposed mapping
  603. * - we can only share with an exact match on most regular files
  604. * - shared mappings on character devices and memory backed files are
  605. * permitted to overlap inexactly as far as we are concerned for in
  606. * these cases, sharing is handled in the driver or filesystem rather
  607. * than here
  608. */
  609. if (vm_flags & VM_MAYSHARE) {
  610. unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
  611. unsigned long vmpglen;
  612. for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) {
  613. vma = rb_entry(rb, struct vm_area_struct, vm_rb);
  614. if (!(vma->vm_flags & VM_MAYSHARE))
  615. continue;
  616. /* search for overlapping mappings on the same file */
  617. if (vma->vm_file->f_dentry->d_inode != file->f_dentry->d_inode)
  618. continue;
  619. if (vma->vm_pgoff >= pgoff + pglen)
  620. continue;
  621. vmpglen = vma->vm_end - vma->vm_start + PAGE_SIZE - 1;
  622. vmpglen >>= PAGE_SHIFT;
  623. if (pgoff >= vma->vm_pgoff + vmpglen)
  624. continue;
  625. /* handle inexactly overlapping matches between mappings */
  626. if (vma->vm_pgoff != pgoff || vmpglen != pglen) {
  627. if (!(capabilities & BDI_CAP_MAP_DIRECT))
  628. goto sharing_violation;
  629. continue;
  630. }
  631. /* we've found a VMA we can share */
  632. atomic_inc(&vma->vm_usage);
  633. vml->vma = vma;
  634. result = (void *) vma->vm_start;
  635. goto shared;
  636. }
  637. vma = NULL;
  638. /* obtain the address at which to make a shared mapping
  639. * - this is the hook for quasi-memory character devices to
  640. * tell us the location of a shared mapping
  641. */
  642. if (file && file->f_op->get_unmapped_area) {
  643. addr = file->f_op->get_unmapped_area(file, addr, len,
  644. pgoff, flags);
  645. if (IS_ERR((void *) addr)) {
  646. ret = addr;
  647. if (ret != (unsigned long) -ENOSYS)
  648. goto error;
  649. /* the driver refused to tell us where to site
  650. * the mapping so we'll have to attempt to copy
  651. * it */
  652. ret = (unsigned long) -ENODEV;
  653. if (!(capabilities & BDI_CAP_MAP_COPY))
  654. goto error;
  655. capabilities &= ~BDI_CAP_MAP_DIRECT;
  656. }
  657. }
  658. }
  659. /* we're going to need a VMA struct as well */
  660. vma = kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
  661. if (!vma)
  662. goto error_getting_vma;
  663. memset(vma, 0, sizeof(*vma));
  664. INIT_LIST_HEAD(&vma->anon_vma_node);
  665. atomic_set(&vma->vm_usage, 1);
  666. if (file)
  667. get_file(file);
  668. vma->vm_file = file;
  669. vma->vm_flags = vm_flags;
  670. vma->vm_start = addr;
  671. vma->vm_end = addr + len;
  672. vma->vm_pgoff = pgoff;
  673. vml->vma = vma;
  674. /* set up the mapping */
  675. if (file && vma->vm_flags & VM_SHARED)
  676. ret = do_mmap_shared_file(vma, len);
  677. else
  678. ret = do_mmap_private(vma, len);
  679. if (ret < 0)
  680. goto error;
  681. /* okay... we have a mapping; now we have to register it */
  682. result = (void *) vma->vm_start;
  683. if (vma->vm_flags & VM_MAPPED_COPY) {
  684. realalloc += kobjsize(result);
  685. askedalloc += len;
  686. }
  687. realalloc += kobjsize(vma);
  688. askedalloc += sizeof(*vma);
  689. current->mm->total_vm += len >> PAGE_SHIFT;
  690. add_nommu_vma(vma);
  691. shared:
  692. realalloc += kobjsize(vml);
  693. askedalloc += sizeof(*vml);
  694. vml->next = current->mm->context.vmlist;
  695. current->mm->context.vmlist = vml;
  696. up_write(&nommu_vma_sem);
  697. if (prot & PROT_EXEC)
  698. flush_icache_range((unsigned long) result,
  699. (unsigned long) result + len);
  700. #ifdef DEBUG
  701. printk("do_mmap:\n");
  702. show_process_blocks();
  703. #endif
  704. return (unsigned long) result;
  705. error:
  706. up_write(&nommu_vma_sem);
  707. kfree(vml);
  708. if (vma) {
  709. fput(vma->vm_file);
  710. kfree(vma);
  711. }
  712. return ret;
  713. sharing_violation:
  714. up_write(&nommu_vma_sem);
  715. printk("Attempt to share mismatched mappings\n");
  716. kfree(vml);
  717. return -EINVAL;
  718. error_getting_vma:
  719. up_write(&nommu_vma_sem);
  720. kfree(vml);
  721. printk("Allocation of vma for %lu byte allocation from process %d failed\n",
  722. len, current->pid);
  723. show_free_areas();
  724. return -ENOMEM;
  725. error_getting_vml:
  726. printk("Allocation of vml for %lu byte allocation from process %d failed\n",
  727. len, current->pid);
  728. show_free_areas();
  729. return -ENOMEM;
  730. }
  731. /*
  732. * handle mapping disposal for uClinux
  733. */
  734. static void put_vma(struct vm_area_struct *vma)
  735. {
  736. if (vma) {
  737. down_write(&nommu_vma_sem);
  738. if (atomic_dec_and_test(&vma->vm_usage)) {
  739. delete_nommu_vma(vma);
  740. if (vma->vm_ops && vma->vm_ops->close)
  741. vma->vm_ops->close(vma);
  742. /* IO memory and memory shared directly out of the pagecache from
  743. * ramfs/tmpfs mustn't be released here */
  744. if (vma->vm_flags & VM_MAPPED_COPY) {
  745. realalloc -= kobjsize((void *) vma->vm_start);
  746. askedalloc -= vma->vm_end - vma->vm_start;
  747. kfree((void *) vma->vm_start);
  748. }
  749. realalloc -= kobjsize(vma);
  750. askedalloc -= sizeof(*vma);
  751. if (vma->vm_file)
  752. fput(vma->vm_file);
  753. kfree(vma);
  754. }
  755. up_write(&nommu_vma_sem);
  756. }
  757. }
  758. int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
  759. {
  760. struct vm_list_struct *vml, **parent;
  761. unsigned long end = addr + len;
  762. #ifdef DEBUG
  763. printk("do_munmap:\n");
  764. #endif
  765. for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next)
  766. if ((*parent)->vma->vm_start == addr &&
  767. ((len == 0) || ((*parent)->vma->vm_end == end)))
  768. goto found;
  769. printk("munmap of non-mmaped memory by process %d (%s): %p\n",
  770. current->pid, current->comm, (void *) addr);
  771. return -EINVAL;
  772. found:
  773. vml = *parent;
  774. put_vma(vml->vma);
  775. *parent = vml->next;
  776. realalloc -= kobjsize(vml);
  777. askedalloc -= sizeof(*vml);
  778. kfree(vml);
  779. mm->total_vm -= len >> PAGE_SHIFT;
  780. #ifdef DEBUG
  781. show_process_blocks();
  782. #endif
  783. return 0;
  784. }
  785. /* Release all mmaps. */
  786. void exit_mmap(struct mm_struct * mm)
  787. {
  788. struct vm_list_struct *tmp;
  789. if (mm) {
  790. #ifdef DEBUG
  791. printk("Exit_mmap:\n");
  792. #endif
  793. mm->total_vm = 0;
  794. while ((tmp = mm->context.vmlist)) {
  795. mm->context.vmlist = tmp->next;
  796. put_vma(tmp->vma);
  797. realalloc -= kobjsize(tmp);
  798. askedalloc -= sizeof(*tmp);
  799. kfree(tmp);
  800. }
  801. #ifdef DEBUG
  802. show_process_blocks();
  803. #endif
  804. }
  805. }
  806. asmlinkage long sys_munmap(unsigned long addr, size_t len)
  807. {
  808. int ret;
  809. struct mm_struct *mm = current->mm;
  810. down_write(&mm->mmap_sem);
  811. ret = do_munmap(mm, addr, len);
  812. up_write(&mm->mmap_sem);
  813. return ret;
  814. }
  815. unsigned long do_brk(unsigned long addr, unsigned long len)
  816. {
  817. return -ENOMEM;
  818. }
  819. /*
  820. * Expand (or shrink) an existing mapping, potentially moving it at the
  821. * same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
  822. *
  823. * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise
  824. * This option implies MREMAP_MAYMOVE.
  825. *
  826. * on uClinux, we only permit changing a mapping's size, and only as long as it stays within the
  827. * hole allocated by the kmalloc() call in do_mmap_pgoff() and the block is not shareable
  828. */
  829. unsigned long do_mremap(unsigned long addr,
  830. unsigned long old_len, unsigned long new_len,
  831. unsigned long flags, unsigned long new_addr)
  832. {
  833. struct vm_list_struct *vml = NULL;
  834. /* insanity checks first */
  835. if (new_len == 0)
  836. return (unsigned long) -EINVAL;
  837. if (flags & MREMAP_FIXED && new_addr != addr)
  838. return (unsigned long) -EINVAL;
  839. for (vml = current->mm->context.vmlist; vml; vml = vml->next)
  840. if (vml->vma->vm_start == addr)
  841. goto found;
  842. return (unsigned long) -EINVAL;
  843. found:
  844. if (vml->vma->vm_end != vml->vma->vm_start + old_len)
  845. return (unsigned long) -EFAULT;
  846. if (vml->vma->vm_flags & VM_MAYSHARE)
  847. return (unsigned long) -EPERM;
  848. if (new_len > kobjsize((void *) addr))
  849. return (unsigned long) -ENOMEM;
  850. /* all checks complete - do it */
  851. vml->vma->vm_end = vml->vma->vm_start + new_len;
  852. askedalloc -= old_len;
  853. askedalloc += new_len;
  854. return vml->vma->vm_start;
  855. }
  856. /*
  857. * Look up the first VMA which satisfies addr < vm_end, NULL if none
  858. */
  859. struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
  860. {
  861. struct vm_list_struct *vml;
  862. for (vml = mm->context.vmlist; vml; vml = vml->next)
  863. if (addr >= vml->vma->vm_start && addr < vml->vma->vm_end)
  864. return vml->vma;
  865. return NULL;
  866. }
  867. EXPORT_SYMBOL(find_vma);
  868. struct page * follow_page(struct mm_struct *mm, unsigned long addr, int write)
  869. {
  870. return NULL;
  871. }
  872. struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
  873. {
  874. return NULL;
  875. }
  876. int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
  877. unsigned long to, unsigned long size, pgprot_t prot)
  878. {
  879. vma->vm_start = vma->vm_pgoff << PAGE_SHIFT;
  880. return 0;
  881. }
  882. void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
  883. {
  884. }
  885. unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
  886. unsigned long len, unsigned long pgoff, unsigned long flags)
  887. {
  888. return -ENOMEM;
  889. }
  890. void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
  891. {
  892. }
  893. void update_mem_hiwater(struct task_struct *tsk)
  894. {
  895. unsigned long rss;
  896. if (likely(tsk->mm)) {
  897. rss = get_mm_counter(tsk->mm, rss);
  898. if (tsk->mm->hiwater_rss < rss)
  899. tsk->mm->hiwater_rss = rss;
  900. if (tsk->mm->hiwater_vm < tsk->mm->total_vm)
  901. tsk->mm->hiwater_vm = tsk->mm->total_vm;
  902. }
  903. }
  904. void unmap_mapping_range(struct address_space *mapping,
  905. loff_t const holebegin, loff_t const holelen,
  906. int even_cows)
  907. {
  908. }
  909. /*
  910. * Check that a process has enough memory to allocate a new virtual
  911. * mapping. 0 means there is enough memory for the allocation to
  912. * succeed and -ENOMEM implies there is not.
  913. *
  914. * We currently support three overcommit policies, which are set via the
  915. * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting
  916. *
  917. * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
  918. * Additional code 2002 Jul 20 by Robert Love.
  919. *
  920. * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
  921. *
  922. * Note this is a helper function intended to be used by LSMs which
  923. * wish to use this logic.
  924. */
  925. int __vm_enough_memory(long pages, int cap_sys_admin)
  926. {
  927. unsigned long free, allowed;
  928. vm_acct_memory(pages);
  929. /*
  930. * Sometimes we want to use more memory than we have
  931. */
  932. if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
  933. return 0;
  934. if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
  935. unsigned long n;
  936. free = get_page_cache_size();
  937. free += nr_swap_pages;
  938. /*
  939. * Any slabs which are created with the
  940. * SLAB_RECLAIM_ACCOUNT flag claim to have contents
  941. * which are reclaimable, under pressure. The dentry
  942. * cache and most inode caches should fall into this
  943. */
  944. free += atomic_read(&slab_reclaim_pages);
  945. /*
  946. * Leave the last 3% for root
  947. */
  948. if (!cap_sys_admin)
  949. free -= free / 32;
  950. if (free > pages)
  951. return 0;
  952. /*
  953. * nr_free_pages() is very expensive on large systems,
  954. * only call if we're about to fail.
  955. */
  956. n = nr_free_pages();
  957. if (!cap_sys_admin)
  958. n -= n / 32;
  959. free += n;
  960. if (free > pages)
  961. return 0;
  962. vm_unacct_memory(pages);
  963. return -ENOMEM;
  964. }
  965. allowed = totalram_pages * sysctl_overcommit_ratio / 100;
  966. /*
  967. * Leave the last 3% for root
  968. */
  969. if (!cap_sys_admin)
  970. allowed -= allowed / 32;
  971. allowed += total_swap_pages;
  972. /* Don't let a single process grow too big:
  973. leave 3% of the size of this process for other processes */
  974. allowed -= current->mm->total_vm / 32;
  975. /*
  976. * cast `allowed' as a signed long because vm_committed_space
  977. * sometimes has a negative value
  978. */
  979. if (atomic_read(&vm_committed_space) < (long)allowed)
  980. return 0;
  981. vm_unacct_memory(pages);
  982. return -ENOMEM;
  983. }
  984. int in_gate_area_no_task(unsigned long addr)
  985. {
  986. return 0;
  987. }