ubd_kern.c 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669
  1. /*
  2. * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
  3. * Licensed under the GPL
  4. */
  5. /* 2001-09-28...2002-04-17
  6. * Partition stuff by James_McMechan@hotmail.com
  7. * old style ubd by setting UBD_SHIFT to 0
  8. * 2002-09-27...2002-10-18 massive tinkering for 2.5
  9. * partitions have changed in 2.5
  10. * 2003-01-29 more tinkering for 2.5.59-1
  11. * This should now address the sysfs problems and has
  12. * the symlink for devfs to allow for booting with
  13. * the common /dev/ubd/discX/... names rather than
  14. * only /dev/ubdN/discN this version also has lots of
  15. * clean ups preparing for ubd-many.
  16. * James McMechan
  17. */
  18. #define MAJOR_NR UBD_MAJOR
  19. #define UBD_SHIFT 4
  20. #include "linux/config.h"
  21. #include "linux/module.h"
  22. #include "linux/blkdev.h"
  23. #include "linux/hdreg.h"
  24. #include "linux/init.h"
  25. #include "linux/devfs_fs_kernel.h"
  26. #include "linux/cdrom.h"
  27. #include "linux/proc_fs.h"
  28. #include "linux/ctype.h"
  29. #include "linux/capability.h"
  30. #include "linux/mm.h"
  31. #include "linux/vmalloc.h"
  32. #include "linux/blkpg.h"
  33. #include "linux/genhd.h"
  34. #include "linux/spinlock.h"
  35. #include "asm/segment.h"
  36. #include "asm/uaccess.h"
  37. #include "asm/irq.h"
  38. #include "asm/types.h"
  39. #include "asm/tlbflush.h"
  40. #include "user_util.h"
  41. #include "mem_user.h"
  42. #include "kern_util.h"
  43. #include "kern.h"
  44. #include "mconsole_kern.h"
  45. #include "init.h"
  46. #include "irq_user.h"
  47. #include "irq_kern.h"
  48. #include "ubd_user.h"
  49. #include "2_5compat.h"
  50. #include "os.h"
  51. #include "mem.h"
  52. #include "mem_kern.h"
  53. #include "cow.h"
  54. enum ubd_req { UBD_READ, UBD_WRITE, UBD_MMAP };
  55. struct io_thread_req {
  56. enum ubd_req op;
  57. int fds[2];
  58. unsigned long offsets[2];
  59. unsigned long long offset;
  60. unsigned long length;
  61. char *buffer;
  62. int sectorsize;
  63. unsigned long sector_mask;
  64. unsigned long long cow_offset;
  65. unsigned long bitmap_words[2];
  66. int map_fd;
  67. unsigned long long map_offset;
  68. int error;
  69. };
  70. extern int open_ubd_file(char *file, struct openflags *openflags,
  71. char **backing_file_out, int *bitmap_offset_out,
  72. unsigned long *bitmap_len_out, int *data_offset_out,
  73. int *create_cow_out);
  74. extern int create_cow_file(char *cow_file, char *backing_file,
  75. struct openflags flags, int sectorsize,
  76. int alignment, int *bitmap_offset_out,
  77. unsigned long *bitmap_len_out,
  78. int *data_offset_out);
  79. extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
  80. extern void do_io(struct io_thread_req *req);
  81. static inline int ubd_test_bit(__u64 bit, unsigned char *data)
  82. {
  83. __u64 n;
  84. int bits, off;
  85. bits = sizeof(data[0]) * 8;
  86. n = bit / bits;
  87. off = bit % bits;
  88. return((data[n] & (1 << off)) != 0);
  89. }
  90. static inline void ubd_set_bit(__u64 bit, unsigned char *data)
  91. {
  92. __u64 n;
  93. int bits, off;
  94. bits = sizeof(data[0]) * 8;
  95. n = bit / bits;
  96. off = bit % bits;
  97. data[n] |= (1 << off);
  98. }
  99. /*End stuff from ubd_user.h*/
  100. #define DRIVER_NAME "uml-blkdev"
  101. static DEFINE_SPINLOCK(ubd_io_lock);
  102. static DEFINE_SPINLOCK(ubd_lock);
  103. static void (*do_ubd)(void);
  104. static int ubd_open(struct inode * inode, struct file * filp);
  105. static int ubd_release(struct inode * inode, struct file * file);
  106. static int ubd_ioctl(struct inode * inode, struct file * file,
  107. unsigned int cmd, unsigned long arg);
  108. #define MAX_DEV (8)
  109. /* Changed in early boot */
  110. static int ubd_do_mmap = 0;
  111. #define UBD_MMAP_BLOCK_SIZE PAGE_SIZE
  112. static struct block_device_operations ubd_blops = {
  113. .owner = THIS_MODULE,
  114. .open = ubd_open,
  115. .release = ubd_release,
  116. .ioctl = ubd_ioctl,
  117. };
  118. /* Protected by the queue_lock */
  119. static request_queue_t *ubd_queue;
  120. /* Protected by ubd_lock */
  121. static int fake_major = MAJOR_NR;
  122. static struct gendisk *ubd_gendisk[MAX_DEV];
  123. static struct gendisk *fake_gendisk[MAX_DEV];
  124. #ifdef CONFIG_BLK_DEV_UBD_SYNC
  125. #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
  126. .cl = 1 })
  127. #else
  128. #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
  129. .cl = 1 })
  130. #endif
  131. /* Not protected - changed only in ubd_setup_common and then only to
  132. * to enable O_SYNC.
  133. */
  134. static struct openflags global_openflags = OPEN_FLAGS;
  135. struct cow {
  136. char *file;
  137. int fd;
  138. unsigned long *bitmap;
  139. unsigned long bitmap_len;
  140. int bitmap_offset;
  141. int data_offset;
  142. };
  143. struct ubd {
  144. char *file;
  145. int count;
  146. int fd;
  147. __u64 size;
  148. struct openflags boot_openflags;
  149. struct openflags openflags;
  150. int no_cow;
  151. struct cow cow;
  152. struct platform_device pdev;
  153. int map_writes;
  154. int map_reads;
  155. int nomap_writes;
  156. int nomap_reads;
  157. int write_maps;
  158. };
  159. #define DEFAULT_COW { \
  160. .file = NULL, \
  161. .fd = -1, \
  162. .bitmap = NULL, \
  163. .bitmap_offset = 0, \
  164. .data_offset = 0, \
  165. }
  166. #define DEFAULT_UBD { \
  167. .file = NULL, \
  168. .count = 0, \
  169. .fd = -1, \
  170. .size = -1, \
  171. .boot_openflags = OPEN_FLAGS, \
  172. .openflags = OPEN_FLAGS, \
  173. .no_cow = 0, \
  174. .cow = DEFAULT_COW, \
  175. .map_writes = 0, \
  176. .map_reads = 0, \
  177. .nomap_writes = 0, \
  178. .nomap_reads = 0, \
  179. .write_maps = 0, \
  180. }
  181. struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
  182. static int ubd0_init(void)
  183. {
  184. struct ubd *dev = &ubd_dev[0];
  185. if(dev->file == NULL)
  186. dev->file = "root_fs";
  187. return(0);
  188. }
  189. __initcall(ubd0_init);
  190. /* Only changed by fake_ide_setup which is a setup */
  191. static int fake_ide = 0;
  192. static struct proc_dir_entry *proc_ide_root = NULL;
  193. static struct proc_dir_entry *proc_ide = NULL;
  194. static void make_proc_ide(void)
  195. {
  196. proc_ide_root = proc_mkdir("ide", NULL);
  197. proc_ide = proc_mkdir("ide0", proc_ide_root);
  198. }
  199. static int proc_ide_read_media(char *page, char **start, off_t off, int count,
  200. int *eof, void *data)
  201. {
  202. int len;
  203. strcpy(page, "disk\n");
  204. len = strlen("disk\n");
  205. len -= off;
  206. if (len < count){
  207. *eof = 1;
  208. if (len <= 0) return 0;
  209. }
  210. else len = count;
  211. *start = page + off;
  212. return len;
  213. }
  214. static void make_ide_entries(char *dev_name)
  215. {
  216. struct proc_dir_entry *dir, *ent;
  217. char name[64];
  218. if(proc_ide_root == NULL) make_proc_ide();
  219. dir = proc_mkdir(dev_name, proc_ide);
  220. if(!dir) return;
  221. ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
  222. if(!ent) return;
  223. ent->nlink = 1;
  224. ent->data = NULL;
  225. ent->read_proc = proc_ide_read_media;
  226. ent->write_proc = NULL;
  227. sprintf(name,"ide0/%s", dev_name);
  228. proc_symlink(dev_name, proc_ide_root, name);
  229. }
  230. static int fake_ide_setup(char *str)
  231. {
  232. fake_ide = 1;
  233. return(1);
  234. }
  235. __setup("fake_ide", fake_ide_setup);
  236. __uml_help(fake_ide_setup,
  237. "fake_ide\n"
  238. " Create ide0 entries that map onto ubd devices.\n\n"
  239. );
  240. static int parse_unit(char **ptr)
  241. {
  242. char *str = *ptr, *end;
  243. int n = -1;
  244. if(isdigit(*str)) {
  245. n = simple_strtoul(str, &end, 0);
  246. if(end == str)
  247. return(-1);
  248. *ptr = end;
  249. }
  250. else if (('a' <= *str) && (*str <= 'h')) {
  251. n = *str - 'a';
  252. str++;
  253. *ptr = str;
  254. }
  255. return(n);
  256. }
  257. static int ubd_setup_common(char *str, int *index_out)
  258. {
  259. struct ubd *dev;
  260. struct openflags flags = global_openflags;
  261. char *backing_file;
  262. int n, err, i;
  263. if(index_out) *index_out = -1;
  264. n = *str;
  265. if(n == '='){
  266. char *end;
  267. int major;
  268. str++;
  269. if(!strcmp(str, "mmap")){
  270. CHOOSE_MODE(printk("mmap not supported by the ubd "
  271. "driver in tt mode\n"),
  272. ubd_do_mmap = 1);
  273. return(0);
  274. }
  275. if(!strcmp(str, "sync")){
  276. global_openflags = of_sync(global_openflags);
  277. return(0);
  278. }
  279. major = simple_strtoul(str, &end, 0);
  280. if((*end != '\0') || (end == str)){
  281. printk(KERN_ERR
  282. "ubd_setup : didn't parse major number\n");
  283. return(1);
  284. }
  285. err = 1;
  286. spin_lock(&ubd_lock);
  287. if(fake_major != MAJOR_NR){
  288. printk(KERN_ERR "Can't assign a fake major twice\n");
  289. goto out1;
  290. }
  291. fake_major = major;
  292. printk(KERN_INFO "Setting extra ubd major number to %d\n",
  293. major);
  294. err = 0;
  295. out1:
  296. spin_unlock(&ubd_lock);
  297. return(err);
  298. }
  299. n = parse_unit(&str);
  300. if(n < 0){
  301. printk(KERN_ERR "ubd_setup : couldn't parse unit number "
  302. "'%s'\n", str);
  303. return(1);
  304. }
  305. if(n >= MAX_DEV){
  306. printk(KERN_ERR "ubd_setup : index %d out of range "
  307. "(%d devices, from 0 to %d)\n", n, MAX_DEV, MAX_DEV - 1);
  308. return(1);
  309. }
  310. err = 1;
  311. spin_lock(&ubd_lock);
  312. dev = &ubd_dev[n];
  313. if(dev->file != NULL){
  314. printk(KERN_ERR "ubd_setup : device already configured\n");
  315. goto out;
  316. }
  317. if (index_out)
  318. *index_out = n;
  319. for (i = 0; i < 4; i++) {
  320. switch (*str) {
  321. case 'r':
  322. flags.w = 0;
  323. break;
  324. case 's':
  325. flags.s = 1;
  326. break;
  327. case 'd':
  328. dev->no_cow = 1;
  329. break;
  330. case '=':
  331. str++;
  332. goto break_loop;
  333. default:
  334. printk(KERN_ERR "ubd_setup : Expected '=' or flag letter (r,s or d)\n");
  335. goto out;
  336. }
  337. str++;
  338. }
  339. if (*str == '=')
  340. printk(KERN_ERR "ubd_setup : Too many flags specified\n");
  341. else
  342. printk(KERN_ERR "ubd_setup : Expected '='\n");
  343. goto out;
  344. break_loop:
  345. err = 0;
  346. backing_file = strchr(str, ',');
  347. if (!backing_file) {
  348. backing_file = strchr(str, ':');
  349. }
  350. if(backing_file){
  351. if(dev->no_cow)
  352. printk(KERN_ERR "Can't specify both 'd' and a "
  353. "cow file\n");
  354. else {
  355. *backing_file = '\0';
  356. backing_file++;
  357. }
  358. }
  359. dev->file = str;
  360. dev->cow.file = backing_file;
  361. dev->boot_openflags = flags;
  362. out:
  363. spin_unlock(&ubd_lock);
  364. return(err);
  365. }
  366. static int ubd_setup(char *str)
  367. {
  368. ubd_setup_common(str, NULL);
  369. return(1);
  370. }
  371. __setup("ubd", ubd_setup);
  372. __uml_help(ubd_setup,
  373. "ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
  374. " This is used to associate a device with a file in the underlying\n"
  375. " filesystem. When specifying two filenames, the first one is the\n"
  376. " COW name and the second is the backing file name. As separator you can\n"
  377. " use either a ':' or a ',': the first one allows writing things like;\n"
  378. " ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
  379. " while with a ',' the shell would not expand the 2nd '~'.\n"
  380. " When using only one filename, UML will detect whether to thread it like\n"
  381. " a COW file or a backing file. To override this detection, add the 'd'\n"
  382. " flag:\n"
  383. " ubd0d=BackingFile\n"
  384. " Usually, there is a filesystem in the file, but \n"
  385. " that's not required. Swap devices containing swap files can be\n"
  386. " specified like this. Also, a file which doesn't contain a\n"
  387. " filesystem can have its contents read in the virtual \n"
  388. " machine by running 'dd' on the device. <n> must be in the range\n"
  389. " 0 to 7. Appending an 'r' to the number will cause that device\n"
  390. " to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
  391. " an 's' will cause data to be written to disk on the host immediately.\n\n"
  392. );
  393. static int udb_setup(char *str)
  394. {
  395. printk("udb%s specified on command line is almost certainly a ubd -> "
  396. "udb TYPO\n", str);
  397. return(1);
  398. }
  399. __setup("udb", udb_setup);
  400. __uml_help(udb_setup,
  401. "udb\n"
  402. " This option is here solely to catch ubd -> udb typos, which can be\n\n"
  403. " to impossible to catch visually unless you specifically look for\n\n"
  404. " them. The only result of any option starting with 'udb' is an error\n\n"
  405. " in the boot output.\n\n"
  406. );
  407. static int fakehd_set = 0;
  408. static int fakehd(char *str)
  409. {
  410. printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
  411. fakehd_set = 1;
  412. return 1;
  413. }
  414. __setup("fakehd", fakehd);
  415. __uml_help(fakehd,
  416. "fakehd\n"
  417. " Change the ubd device name to \"hd\".\n\n"
  418. );
  419. static void do_ubd_request(request_queue_t * q);
  420. /* Only changed by ubd_init, which is an initcall. */
  421. int thread_fd = -1;
  422. /* Changed by ubd_handler, which is serialized because interrupts only
  423. * happen on CPU 0.
  424. */
  425. int intr_count = 0;
  426. /* call ubd_finish if you need to serialize */
  427. static void __ubd_finish(struct request *req, int error)
  428. {
  429. int nsect;
  430. if(error){
  431. end_request(req, 0);
  432. return;
  433. }
  434. nsect = req->current_nr_sectors;
  435. req->sector += nsect;
  436. req->buffer += nsect << 9;
  437. req->errors = 0;
  438. req->nr_sectors -= nsect;
  439. req->current_nr_sectors = 0;
  440. end_request(req, 1);
  441. }
  442. static inline void ubd_finish(struct request *req, int error)
  443. {
  444. spin_lock(&ubd_io_lock);
  445. __ubd_finish(req, error);
  446. spin_unlock(&ubd_io_lock);
  447. }
  448. /* Called without ubd_io_lock held */
  449. static void ubd_handler(void)
  450. {
  451. struct io_thread_req req;
  452. struct request *rq = elv_next_request(ubd_queue);
  453. int n, err;
  454. do_ubd = NULL;
  455. intr_count++;
  456. n = os_read_file(thread_fd, &req, sizeof(req));
  457. if(n != sizeof(req)){
  458. printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
  459. "err = %d\n", os_getpid(), -n);
  460. spin_lock(&ubd_io_lock);
  461. end_request(rq, 0);
  462. spin_unlock(&ubd_io_lock);
  463. return;
  464. }
  465. if((req.op != UBD_MMAP) &&
  466. ((req.offset != ((__u64) (rq->sector)) << 9) ||
  467. (req.length != (rq->current_nr_sectors) << 9)))
  468. panic("I/O op mismatch");
  469. if(req.map_fd != -1){
  470. err = physmem_subst_mapping(req.buffer, req.map_fd,
  471. req.map_offset, 1);
  472. if(err)
  473. printk("ubd_handler - physmem_subst_mapping failed, "
  474. "err = %d\n", -err);
  475. }
  476. ubd_finish(rq, req.error);
  477. reactivate_fd(thread_fd, UBD_IRQ);
  478. do_ubd_request(ubd_queue);
  479. }
  480. static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused)
  481. {
  482. ubd_handler();
  483. return(IRQ_HANDLED);
  484. }
  485. /* Only changed by ubd_init, which is an initcall. */
  486. static int io_pid = -1;
  487. void kill_io_thread(void)
  488. {
  489. if(io_pid != -1)
  490. os_kill_process(io_pid, 1);
  491. }
  492. __uml_exitcall(kill_io_thread);
  493. static int ubd_file_size(struct ubd *dev, __u64 *size_out)
  494. {
  495. char *file;
  496. file = dev->cow.file ? dev->cow.file : dev->file;
  497. return(os_file_size(file, size_out));
  498. }
  499. static void ubd_close(struct ubd *dev)
  500. {
  501. if(ubd_do_mmap)
  502. physmem_forget_descriptor(dev->fd);
  503. os_close_file(dev->fd);
  504. if(dev->cow.file == NULL)
  505. return;
  506. if(ubd_do_mmap)
  507. physmem_forget_descriptor(dev->cow.fd);
  508. os_close_file(dev->cow.fd);
  509. vfree(dev->cow.bitmap);
  510. dev->cow.bitmap = NULL;
  511. }
  512. static int ubd_open_dev(struct ubd *dev)
  513. {
  514. struct openflags flags;
  515. char **back_ptr;
  516. int err, create_cow, *create_ptr;
  517. dev->openflags = dev->boot_openflags;
  518. create_cow = 0;
  519. create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL;
  520. back_ptr = dev->no_cow ? NULL : &dev->cow.file;
  521. dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr,
  522. &dev->cow.bitmap_offset, &dev->cow.bitmap_len,
  523. &dev->cow.data_offset, create_ptr);
  524. if((dev->fd == -ENOENT) && create_cow){
  525. dev->fd = create_cow_file(dev->file, dev->cow.file,
  526. dev->openflags, 1 << 9, PAGE_SIZE,
  527. &dev->cow.bitmap_offset,
  528. &dev->cow.bitmap_len,
  529. &dev->cow.data_offset);
  530. if(dev->fd >= 0){
  531. printk(KERN_INFO "Creating \"%s\" as COW file for "
  532. "\"%s\"\n", dev->file, dev->cow.file);
  533. }
  534. }
  535. if(dev->fd < 0){
  536. printk("Failed to open '%s', errno = %d\n", dev->file,
  537. -dev->fd);
  538. return(dev->fd);
  539. }
  540. if(dev->cow.file != NULL){
  541. err = -ENOMEM;
  542. dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len);
  543. if(dev->cow.bitmap == NULL){
  544. printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
  545. goto error;
  546. }
  547. flush_tlb_kernel_vm();
  548. err = read_cow_bitmap(dev->fd, dev->cow.bitmap,
  549. dev->cow.bitmap_offset,
  550. dev->cow.bitmap_len);
  551. if(err < 0)
  552. goto error;
  553. flags = dev->openflags;
  554. flags.w = 0;
  555. err = open_ubd_file(dev->cow.file, &flags, NULL, NULL, NULL,
  556. NULL, NULL);
  557. if(err < 0) goto error;
  558. dev->cow.fd = err;
  559. }
  560. return(0);
  561. error:
  562. os_close_file(dev->fd);
  563. return(err);
  564. }
  565. static int ubd_new_disk(int major, u64 size, int unit,
  566. struct gendisk **disk_out)
  567. {
  568. struct gendisk *disk;
  569. char from[sizeof("ubd/nnnnn\0")], to[sizeof("discnnnnn/disc\0")];
  570. int err;
  571. disk = alloc_disk(1 << UBD_SHIFT);
  572. if(disk == NULL)
  573. return(-ENOMEM);
  574. disk->major = major;
  575. disk->first_minor = unit << UBD_SHIFT;
  576. disk->fops = &ubd_blops;
  577. set_capacity(disk, size / 512);
  578. if(major == MAJOR_NR){
  579. sprintf(disk->disk_name, "ubd%c", 'a' + unit);
  580. sprintf(disk->devfs_name, "ubd/disc%d", unit);
  581. sprintf(from, "ubd/%d", unit);
  582. sprintf(to, "disc%d/disc", unit);
  583. err = devfs_mk_symlink(from, to);
  584. if(err)
  585. printk("ubd_new_disk failed to make link from %s to "
  586. "%s, error = %d\n", from, to, err);
  587. }
  588. else {
  589. sprintf(disk->disk_name, "ubd_fake%d", unit);
  590. sprintf(disk->devfs_name, "ubd_fake/disc%d", unit);
  591. }
  592. /* sysfs register (not for ide fake devices) */
  593. if (major == MAJOR_NR) {
  594. ubd_dev[unit].pdev.id = unit;
  595. ubd_dev[unit].pdev.name = DRIVER_NAME;
  596. platform_device_register(&ubd_dev[unit].pdev);
  597. disk->driverfs_dev = &ubd_dev[unit].pdev.dev;
  598. }
  599. disk->private_data = &ubd_dev[unit];
  600. disk->queue = ubd_queue;
  601. add_disk(disk);
  602. *disk_out = disk;
  603. return 0;
  604. }
  605. #define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
  606. static int ubd_add(int n)
  607. {
  608. struct ubd *dev = &ubd_dev[n];
  609. int err;
  610. if(dev->file == NULL)
  611. return(-ENODEV);
  612. if (ubd_open_dev(dev))
  613. return(-ENODEV);
  614. err = ubd_file_size(dev, &dev->size);
  615. if(err < 0)
  616. return(err);
  617. dev->size = ROUND_BLOCK(dev->size);
  618. err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]);
  619. if(err)
  620. return(err);
  621. if(fake_major != MAJOR_NR)
  622. ubd_new_disk(fake_major, dev->size, n,
  623. &fake_gendisk[n]);
  624. /* perhaps this should also be under the "if (fake_major)" above */
  625. /* using the fake_disk->disk_name and also the fakehd_set name */
  626. if (fake_ide)
  627. make_ide_entries(ubd_gendisk[n]->disk_name);
  628. ubd_close(dev);
  629. return 0;
  630. }
  631. static int ubd_config(char *str)
  632. {
  633. int n, err;
  634. str = uml_strdup(str);
  635. if(str == NULL){
  636. printk(KERN_ERR "ubd_config failed to strdup string\n");
  637. return(1);
  638. }
  639. err = ubd_setup_common(str, &n);
  640. if(err){
  641. kfree(str);
  642. return(-1);
  643. }
  644. if(n == -1) return(0);
  645. spin_lock(&ubd_lock);
  646. err = ubd_add(n);
  647. if(err)
  648. ubd_dev[n].file = NULL;
  649. spin_unlock(&ubd_lock);
  650. return(err);
  651. }
  652. static int ubd_get_config(char *name, char *str, int size, char **error_out)
  653. {
  654. struct ubd *dev;
  655. int n, len = 0;
  656. n = parse_unit(&name);
  657. if((n >= MAX_DEV) || (n < 0)){
  658. *error_out = "ubd_get_config : device number out of range";
  659. return(-1);
  660. }
  661. dev = &ubd_dev[n];
  662. spin_lock(&ubd_lock);
  663. if(dev->file == NULL){
  664. CONFIG_CHUNK(str, size, len, "", 1);
  665. goto out;
  666. }
  667. CONFIG_CHUNK(str, size, len, dev->file, 0);
  668. if(dev->cow.file != NULL){
  669. CONFIG_CHUNK(str, size, len, ",", 0);
  670. CONFIG_CHUNK(str, size, len, dev->cow.file, 1);
  671. }
  672. else CONFIG_CHUNK(str, size, len, "", 1);
  673. out:
  674. spin_unlock(&ubd_lock);
  675. return(len);
  676. }
  677. static int ubd_remove(char *str)
  678. {
  679. struct ubd *dev;
  680. int n, err = -ENODEV;
  681. n = parse_unit(&str);
  682. if((n < 0) || (n >= MAX_DEV))
  683. return(err);
  684. dev = &ubd_dev[n];
  685. if(dev->count > 0)
  686. return(-EBUSY); /* you cannot remove a open disk */
  687. err = 0;
  688. spin_lock(&ubd_lock);
  689. if(ubd_gendisk[n] == NULL)
  690. goto out;
  691. del_gendisk(ubd_gendisk[n]);
  692. put_disk(ubd_gendisk[n]);
  693. ubd_gendisk[n] = NULL;
  694. if(fake_gendisk[n] != NULL){
  695. del_gendisk(fake_gendisk[n]);
  696. put_disk(fake_gendisk[n]);
  697. fake_gendisk[n] = NULL;
  698. }
  699. platform_device_unregister(&dev->pdev);
  700. *dev = ((struct ubd) DEFAULT_UBD);
  701. err = 0;
  702. out:
  703. spin_unlock(&ubd_lock);
  704. return(err);
  705. }
  706. static struct mc_device ubd_mc = {
  707. .name = "ubd",
  708. .config = ubd_config,
  709. .get_config = ubd_get_config,
  710. .remove = ubd_remove,
  711. };
  712. static int ubd_mc_init(void)
  713. {
  714. mconsole_register_dev(&ubd_mc);
  715. return 0;
  716. }
  717. __initcall(ubd_mc_init);
  718. static struct device_driver ubd_driver = {
  719. .name = DRIVER_NAME,
  720. .bus = &platform_bus_type,
  721. };
  722. int ubd_init(void)
  723. {
  724. int i;
  725. devfs_mk_dir("ubd");
  726. if (register_blkdev(MAJOR_NR, "ubd"))
  727. return -1;
  728. ubd_queue = blk_init_queue(do_ubd_request, &ubd_io_lock);
  729. if (!ubd_queue) {
  730. unregister_blkdev(MAJOR_NR, "ubd");
  731. return -1;
  732. }
  733. if (fake_major != MAJOR_NR) {
  734. char name[sizeof("ubd_nnn\0")];
  735. snprintf(name, sizeof(name), "ubd_%d", fake_major);
  736. devfs_mk_dir(name);
  737. if (register_blkdev(fake_major, "ubd"))
  738. return -1;
  739. }
  740. driver_register(&ubd_driver);
  741. for (i = 0; i < MAX_DEV; i++)
  742. ubd_add(i);
  743. return 0;
  744. }
  745. late_initcall(ubd_init);
  746. int ubd_driver_init(void){
  747. unsigned long stack;
  748. int err;
  749. /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
  750. if(global_openflags.s){
  751. printk(KERN_INFO "ubd: Synchronous mode\n");
  752. /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
  753. * enough. So use anyway the io thread. */
  754. }
  755. stack = alloc_stack(0, 0);
  756. io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
  757. &thread_fd);
  758. if(io_pid < 0){
  759. printk(KERN_ERR
  760. "ubd : Failed to start I/O thread (errno = %d) - "
  761. "falling back to synchronous I/O\n", -io_pid);
  762. io_pid = -1;
  763. return(0);
  764. }
  765. err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
  766. SA_INTERRUPT, "ubd", ubd_dev);
  767. if(err != 0)
  768. printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
  769. return(err);
  770. }
  771. device_initcall(ubd_driver_init);
  772. static int ubd_open(struct inode *inode, struct file *filp)
  773. {
  774. struct gendisk *disk = inode->i_bdev->bd_disk;
  775. struct ubd *dev = disk->private_data;
  776. int err = 0;
  777. if(dev->count == 0){
  778. err = ubd_open_dev(dev);
  779. if(err){
  780. printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
  781. disk->disk_name, dev->file, -err);
  782. goto out;
  783. }
  784. }
  785. dev->count++;
  786. if((filp->f_mode & FMODE_WRITE) && !dev->openflags.w){
  787. if(--dev->count == 0) ubd_close(dev);
  788. err = -EROFS;
  789. }
  790. out:
  791. return(err);
  792. }
  793. static int ubd_release(struct inode * inode, struct file * file)
  794. {
  795. struct gendisk *disk = inode->i_bdev->bd_disk;
  796. struct ubd *dev = disk->private_data;
  797. if(--dev->count == 0)
  798. ubd_close(dev);
  799. return(0);
  800. }
  801. static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
  802. __u64 *cow_offset, unsigned long *bitmap,
  803. __u64 bitmap_offset, unsigned long *bitmap_words,
  804. __u64 bitmap_len)
  805. {
  806. __u64 sector = io_offset >> 9;
  807. int i, update_bitmap = 0;
  808. for(i = 0; i < length >> 9; i++){
  809. if(cow_mask != NULL)
  810. ubd_set_bit(i, (unsigned char *) cow_mask);
  811. if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
  812. continue;
  813. update_bitmap = 1;
  814. ubd_set_bit(sector + i, (unsigned char *) bitmap);
  815. }
  816. if(!update_bitmap)
  817. return;
  818. *cow_offset = sector / (sizeof(unsigned long) * 8);
  819. /* This takes care of the case where we're exactly at the end of the
  820. * device, and *cow_offset + 1 is off the end. So, just back it up
  821. * by one word. Thanks to Lynn Kerby for the fix and James McMechan
  822. * for the original diagnosis.
  823. */
  824. if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
  825. sizeof(unsigned long) - 1))
  826. (*cow_offset)--;
  827. bitmap_words[0] = bitmap[*cow_offset];
  828. bitmap_words[1] = bitmap[*cow_offset + 1];
  829. *cow_offset *= sizeof(unsigned long);
  830. *cow_offset += bitmap_offset;
  831. }
  832. static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
  833. __u64 bitmap_offset, __u64 bitmap_len)
  834. {
  835. __u64 sector = req->offset >> 9;
  836. int i;
  837. if(req->length > (sizeof(req->sector_mask) * 8) << 9)
  838. panic("Operation too long");
  839. if(req->op == UBD_READ) {
  840. for(i = 0; i < req->length >> 9; i++){
  841. if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
  842. ubd_set_bit(i, (unsigned char *)
  843. &req->sector_mask);
  844. }
  845. }
  846. else cowify_bitmap(req->offset, req->length, &req->sector_mask,
  847. &req->cow_offset, bitmap, bitmap_offset,
  848. req->bitmap_words, bitmap_len);
  849. }
  850. static int mmap_fd(struct request *req, struct ubd *dev, __u64 offset)
  851. {
  852. __u64 sector;
  853. unsigned char *bitmap;
  854. int bit, i;
  855. /* mmap must have been requested on the command line */
  856. if(!ubd_do_mmap)
  857. return(-1);
  858. /* The buffer must be page aligned */
  859. if(((unsigned long) req->buffer % UBD_MMAP_BLOCK_SIZE) != 0)
  860. return(-1);
  861. /* The request must be a page long */
  862. if((req->current_nr_sectors << 9) != PAGE_SIZE)
  863. return(-1);
  864. if(dev->cow.file == NULL)
  865. return(dev->fd);
  866. sector = offset >> 9;
  867. bitmap = (unsigned char *) dev->cow.bitmap;
  868. bit = ubd_test_bit(sector, bitmap);
  869. for(i = 1; i < req->current_nr_sectors; i++){
  870. if(ubd_test_bit(sector + i, bitmap) != bit)
  871. return(-1);
  872. }
  873. if(bit || (rq_data_dir(req) == WRITE))
  874. offset += dev->cow.data_offset;
  875. /* The data on disk must be page aligned */
  876. if((offset % UBD_MMAP_BLOCK_SIZE) != 0)
  877. return(-1);
  878. return(bit ? dev->fd : dev->cow.fd);
  879. }
  880. static int prepare_mmap_request(struct ubd *dev, int fd, __u64 offset,
  881. struct request *req,
  882. struct io_thread_req *io_req)
  883. {
  884. int err;
  885. if(rq_data_dir(req) == WRITE){
  886. /* Writes are almost no-ops since the new data is already in the
  887. * host page cache
  888. */
  889. dev->map_writes++;
  890. if(dev->cow.file != NULL)
  891. cowify_bitmap(io_req->offset, io_req->length,
  892. &io_req->sector_mask, &io_req->cow_offset,
  893. dev->cow.bitmap, dev->cow.bitmap_offset,
  894. io_req->bitmap_words,
  895. dev->cow.bitmap_len);
  896. }
  897. else {
  898. int w;
  899. if((dev->cow.file != NULL) && (fd == dev->cow.fd))
  900. w = 0;
  901. else w = dev->openflags.w;
  902. if((dev->cow.file != NULL) && (fd == dev->fd))
  903. offset += dev->cow.data_offset;
  904. err = physmem_subst_mapping(req->buffer, fd, offset, w);
  905. if(err){
  906. printk("physmem_subst_mapping failed, err = %d\n",
  907. -err);
  908. return(1);
  909. }
  910. dev->map_reads++;
  911. }
  912. io_req->op = UBD_MMAP;
  913. io_req->buffer = req->buffer;
  914. return(0);
  915. }
  916. /* Called with ubd_io_lock held */
  917. static int prepare_request(struct request *req, struct io_thread_req *io_req)
  918. {
  919. struct gendisk *disk = req->rq_disk;
  920. struct ubd *dev = disk->private_data;
  921. __u64 offset;
  922. int len, fd;
  923. if(req->rq_status == RQ_INACTIVE) return(1);
  924. if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
  925. printk("Write attempted on readonly ubd device %s\n",
  926. disk->disk_name);
  927. end_request(req, 0);
  928. return(1);
  929. }
  930. offset = ((__u64) req->sector) << 9;
  931. len = req->current_nr_sectors << 9;
  932. io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
  933. io_req->fds[1] = dev->fd;
  934. io_req->map_fd = -1;
  935. io_req->cow_offset = -1;
  936. io_req->offset = offset;
  937. io_req->length = len;
  938. io_req->error = 0;
  939. io_req->sector_mask = 0;
  940. fd = mmap_fd(req, dev, io_req->offset);
  941. if(fd > 0){
  942. /* If mmapping is otherwise OK, but the first access to the
  943. * page is a write, then it's not mapped in yet. So we have
  944. * to write the data to disk first, then we can map the disk
  945. * page in and continue normally from there.
  946. */
  947. if((rq_data_dir(req) == WRITE) && !is_remapped(req->buffer)){
  948. io_req->map_fd = dev->fd;
  949. io_req->map_offset = io_req->offset +
  950. dev->cow.data_offset;
  951. dev->write_maps++;
  952. }
  953. else return(prepare_mmap_request(dev, fd, io_req->offset, req,
  954. io_req));
  955. }
  956. if(rq_data_dir(req) == READ)
  957. dev->nomap_reads++;
  958. else dev->nomap_writes++;
  959. io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
  960. io_req->offsets[0] = 0;
  961. io_req->offsets[1] = dev->cow.data_offset;
  962. io_req->buffer = req->buffer;
  963. io_req->sectorsize = 1 << 9;
  964. if(dev->cow.file != NULL)
  965. cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset,
  966. dev->cow.bitmap_len);
  967. return(0);
  968. }
  969. /* Called with ubd_io_lock held */
  970. static void do_ubd_request(request_queue_t *q)
  971. {
  972. struct io_thread_req io_req;
  973. struct request *req;
  974. int err, n;
  975. if(thread_fd == -1){
  976. while((req = elv_next_request(q)) != NULL){
  977. err = prepare_request(req, &io_req);
  978. if(!err){
  979. do_io(&io_req);
  980. __ubd_finish(req, io_req.error);
  981. }
  982. }
  983. }
  984. else {
  985. if(do_ubd || (req = elv_next_request(q)) == NULL)
  986. return;
  987. err = prepare_request(req, &io_req);
  988. if(!err){
  989. do_ubd = ubd_handler;
  990. n = os_write_file(thread_fd, (char *) &io_req,
  991. sizeof(io_req));
  992. if(n != sizeof(io_req))
  993. printk("write to io thread failed, "
  994. "errno = %d\n", -n);
  995. }
  996. }
  997. }
  998. static int ubd_ioctl(struct inode * inode, struct file * file,
  999. unsigned int cmd, unsigned long arg)
  1000. {
  1001. struct hd_geometry __user *loc = (struct hd_geometry __user *) arg;
  1002. struct ubd *dev = inode->i_bdev->bd_disk->private_data;
  1003. struct hd_driveid ubd_id = {
  1004. .cyls = 0,
  1005. .heads = 128,
  1006. .sectors = 32,
  1007. };
  1008. switch (cmd) {
  1009. struct hd_geometry g;
  1010. struct cdrom_volctrl volume;
  1011. case HDIO_GETGEO:
  1012. if(!loc) return(-EINVAL);
  1013. g.heads = 128;
  1014. g.sectors = 32;
  1015. g.cylinders = dev->size / (128 * 32 * 512);
  1016. g.start = get_start_sect(inode->i_bdev);
  1017. return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0);
  1018. case HDIO_GET_IDENTITY:
  1019. ubd_id.cyls = dev->size / (128 * 32 * 512);
  1020. if(copy_to_user((char __user *) arg, (char *) &ubd_id,
  1021. sizeof(ubd_id)))
  1022. return(-EFAULT);
  1023. return(0);
  1024. case CDROMVOLREAD:
  1025. if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
  1026. return(-EFAULT);
  1027. volume.channel0 = 255;
  1028. volume.channel1 = 255;
  1029. volume.channel2 = 255;
  1030. volume.channel3 = 255;
  1031. if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
  1032. return(-EFAULT);
  1033. return(0);
  1034. }
  1035. return(-EINVAL);
  1036. }
  1037. static int ubd_check_remapped(int fd, unsigned long address, int is_write,
  1038. __u64 offset)
  1039. {
  1040. __u64 bitmap_offset;
  1041. unsigned long new_bitmap[2];
  1042. int i, err, n;
  1043. /* If it's not a write access, we can't do anything about it */
  1044. if(!is_write)
  1045. return(0);
  1046. /* We have a write */
  1047. for(i = 0; i < sizeof(ubd_dev) / sizeof(ubd_dev[0]); i++){
  1048. struct ubd *dev = &ubd_dev[i];
  1049. if((dev->fd != fd) && (dev->cow.fd != fd))
  1050. continue;
  1051. /* It's a write to a ubd device */
  1052. if(!dev->openflags.w){
  1053. /* It's a write access on a read-only device - probably
  1054. * shouldn't happen. If the kernel is trying to change
  1055. * something with no intention of writing it back out,
  1056. * then this message will clue us in that this needs
  1057. * fixing
  1058. */
  1059. printk("Write access to mapped page from readonly ubd "
  1060. "device %d\n", i);
  1061. return(0);
  1062. }
  1063. /* It's a write to a writeable ubd device - it must be COWed
  1064. * because, otherwise, the page would have been mapped in
  1065. * writeable
  1066. */
  1067. if(!dev->cow.file)
  1068. panic("Write fault on writeable non-COW ubd device %d",
  1069. i);
  1070. /* It should also be an access to the backing file since the
  1071. * COW pages should be mapped in read-write
  1072. */
  1073. if(fd == dev->fd)
  1074. panic("Write fault on a backing page of ubd "
  1075. "device %d\n", i);
  1076. /* So, we do the write, copying the backing data to the COW
  1077. * file...
  1078. */
  1079. err = os_seek_file(dev->fd, offset + dev->cow.data_offset);
  1080. if(err < 0)
  1081. panic("Couldn't seek to %lld in COW file of ubd "
  1082. "device %d, err = %d",
  1083. offset + dev->cow.data_offset, i, -err);
  1084. n = os_write_file(dev->fd, (void *) address, PAGE_SIZE);
  1085. if(n != PAGE_SIZE)
  1086. panic("Couldn't copy data to COW file of ubd "
  1087. "device %d, err = %d", i, -n);
  1088. /* ... updating the COW bitmap... */
  1089. cowify_bitmap(offset, PAGE_SIZE, NULL, &bitmap_offset,
  1090. dev->cow.bitmap, dev->cow.bitmap_offset,
  1091. new_bitmap, dev->cow.bitmap_len);
  1092. err = os_seek_file(dev->fd, bitmap_offset);
  1093. if(err < 0)
  1094. panic("Couldn't seek to %lld in COW file of ubd "
  1095. "device %d, err = %d", bitmap_offset, i, -err);
  1096. n = os_write_file(dev->fd, new_bitmap, sizeof(new_bitmap));
  1097. if(n != sizeof(new_bitmap))
  1098. panic("Couldn't update bitmap of ubd device %d, "
  1099. "err = %d", i, -n);
  1100. /* Maybe we can map the COW page in, and maybe we can't. If
  1101. * it is a pre-V3 COW file, we can't, since the alignment will
  1102. * be wrong. If it is a V3 or later COW file which has been
  1103. * moved to a system with a larger page size, then maybe we
  1104. * can't, depending on the exact location of the page.
  1105. */
  1106. offset += dev->cow.data_offset;
  1107. /* Remove the remapping, putting the original anonymous page
  1108. * back. If the COW file can be mapped in, that is done.
  1109. * Otherwise, the COW page is read in.
  1110. */
  1111. if(!physmem_remove_mapping((void *) address))
  1112. panic("Address 0x%lx not remapped by ubd device %d",
  1113. address, i);
  1114. if((offset % UBD_MMAP_BLOCK_SIZE) == 0)
  1115. physmem_subst_mapping((void *) address, dev->fd,
  1116. offset, 1);
  1117. else {
  1118. err = os_seek_file(dev->fd, offset);
  1119. if(err < 0)
  1120. panic("Couldn't seek to %lld in COW file of "
  1121. "ubd device %d, err = %d", offset, i,
  1122. -err);
  1123. n = os_read_file(dev->fd, (void *) address, PAGE_SIZE);
  1124. if(n != PAGE_SIZE)
  1125. panic("Failed to read page from offset %llx of "
  1126. "COW file of ubd device %d, err = %d",
  1127. offset, i, -n);
  1128. }
  1129. return(1);
  1130. }
  1131. /* It's not a write on a ubd device */
  1132. return(0);
  1133. }
  1134. static struct remapper ubd_remapper = {
  1135. .list = LIST_HEAD_INIT(ubd_remapper.list),
  1136. .proc = ubd_check_remapped,
  1137. };
  1138. static int ubd_remapper_setup(void)
  1139. {
  1140. if(ubd_do_mmap)
  1141. register_remapper(&ubd_remapper);
  1142. return(0);
  1143. }
  1144. __initcall(ubd_remapper_setup);
  1145. static int same_backing_files(char *from_cmdline, char *from_cow, char *cow)
  1146. {
  1147. struct uml_stat buf1, buf2;
  1148. int err;
  1149. if(from_cmdline == NULL) return(1);
  1150. if(!strcmp(from_cmdline, from_cow)) return(1);
  1151. err = os_stat_file(from_cmdline, &buf1);
  1152. if(err < 0){
  1153. printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
  1154. return(1);
  1155. }
  1156. err = os_stat_file(from_cow, &buf2);
  1157. if(err < 0){
  1158. printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
  1159. return(1);
  1160. }
  1161. if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
  1162. return(1);
  1163. printk("Backing file mismatch - \"%s\" requested,\n"
  1164. "\"%s\" specified in COW header of \"%s\"\n",
  1165. from_cmdline, from_cow, cow);
  1166. return(0);
  1167. }
  1168. static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
  1169. {
  1170. unsigned long modtime;
  1171. long long actual;
  1172. int err;
  1173. err = os_file_modtime(file, &modtime);
  1174. if(err < 0){
  1175. printk("Failed to get modification time of backing file "
  1176. "\"%s\", err = %d\n", file, -err);
  1177. return(err);
  1178. }
  1179. err = os_file_size(file, &actual);
  1180. if(err < 0){
  1181. printk("Failed to get size of backing file \"%s\", "
  1182. "err = %d\n", file, -err);
  1183. return(err);
  1184. }
  1185. if(actual != size){
  1186. /*__u64 can be a long on AMD64 and with %lu GCC complains; so
  1187. * the typecast.*/
  1188. printk("Size mismatch (%llu vs %llu) of COW header vs backing "
  1189. "file\n", (unsigned long long) size, actual);
  1190. return(-EINVAL);
  1191. }
  1192. if(modtime != mtime){
  1193. printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
  1194. "file\n", mtime, modtime);
  1195. return(-EINVAL);
  1196. }
  1197. return(0);
  1198. }
  1199. int read_cow_bitmap(int fd, void *buf, int offset, int len)
  1200. {
  1201. int err;
  1202. err = os_seek_file(fd, offset);
  1203. if(err < 0)
  1204. return(err);
  1205. err = os_read_file(fd, buf, len);
  1206. if(err < 0)
  1207. return(err);
  1208. return(0);
  1209. }
  1210. int open_ubd_file(char *file, struct openflags *openflags,
  1211. char **backing_file_out, int *bitmap_offset_out,
  1212. unsigned long *bitmap_len_out, int *data_offset_out,
  1213. int *create_cow_out)
  1214. {
  1215. time_t mtime;
  1216. unsigned long long size;
  1217. __u32 version, align;
  1218. char *backing_file;
  1219. int fd, err, sectorsize, same, mode = 0644;
  1220. fd = os_open_file(file, *openflags, mode);
  1221. if(fd < 0){
  1222. if((fd == -ENOENT) && (create_cow_out != NULL))
  1223. *create_cow_out = 1;
  1224. if(!openflags->w ||
  1225. ((fd != -EROFS) && (fd != -EACCES))) return(fd);
  1226. openflags->w = 0;
  1227. fd = os_open_file(file, *openflags, mode);
  1228. if(fd < 0)
  1229. return(fd);
  1230. }
  1231. err = os_lock_file(fd, openflags->w);
  1232. if(err < 0){
  1233. printk("Failed to lock '%s', err = %d\n", file, -err);
  1234. goto out_close;
  1235. }
  1236. if(backing_file_out == NULL) return(fd);
  1237. err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
  1238. &size, &sectorsize, &align, bitmap_offset_out);
  1239. if(err && (*backing_file_out != NULL)){
  1240. printk("Failed to read COW header from COW file \"%s\", "
  1241. "errno = %d\n", file, -err);
  1242. goto out_close;
  1243. }
  1244. if(err) return(fd);
  1245. if(backing_file_out == NULL) return(fd);
  1246. same = same_backing_files(*backing_file_out, backing_file, file);
  1247. if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){
  1248. printk("Switching backing file to '%s'\n", *backing_file_out);
  1249. err = write_cow_header(file, fd, *backing_file_out,
  1250. sectorsize, align, &size);
  1251. if(err){
  1252. printk("Switch failed, errno = %d\n", -err);
  1253. return(err);
  1254. }
  1255. }
  1256. else {
  1257. *backing_file_out = backing_file;
  1258. err = backing_file_mismatch(*backing_file_out, size, mtime);
  1259. if(err) goto out_close;
  1260. }
  1261. cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
  1262. bitmap_len_out, data_offset_out);
  1263. return(fd);
  1264. out_close:
  1265. os_close_file(fd);
  1266. return(err);
  1267. }
  1268. int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
  1269. int sectorsize, int alignment, int *bitmap_offset_out,
  1270. unsigned long *bitmap_len_out, int *data_offset_out)
  1271. {
  1272. int err, fd;
  1273. flags.c = 1;
  1274. fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL);
  1275. if(fd < 0){
  1276. err = fd;
  1277. printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
  1278. -err);
  1279. goto out;
  1280. }
  1281. err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
  1282. bitmap_offset_out, bitmap_len_out,
  1283. data_offset_out);
  1284. if(!err)
  1285. return(fd);
  1286. os_close_file(fd);
  1287. out:
  1288. return(err);
  1289. }
  1290. static int update_bitmap(struct io_thread_req *req)
  1291. {
  1292. int n;
  1293. if(req->cow_offset == -1)
  1294. return(0);
  1295. n = os_seek_file(req->fds[1], req->cow_offset);
  1296. if(n < 0){
  1297. printk("do_io - bitmap lseek failed : err = %d\n", -n);
  1298. return(1);
  1299. }
  1300. n = os_write_file(req->fds[1], &req->bitmap_words,
  1301. sizeof(req->bitmap_words));
  1302. if(n != sizeof(req->bitmap_words)){
  1303. printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
  1304. req->fds[1]);
  1305. return(1);
  1306. }
  1307. return(0);
  1308. }
  1309. void do_io(struct io_thread_req *req)
  1310. {
  1311. char *buf;
  1312. unsigned long len;
  1313. int n, nsectors, start, end, bit;
  1314. int err;
  1315. __u64 off;
  1316. if(req->op == UBD_MMAP){
  1317. /* Touch the page to force the host to do any necessary IO to
  1318. * get it into memory
  1319. */
  1320. n = *((volatile int *) req->buffer);
  1321. req->error = update_bitmap(req);
  1322. return;
  1323. }
  1324. nsectors = req->length / req->sectorsize;
  1325. start = 0;
  1326. do {
  1327. bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
  1328. end = start;
  1329. while((end < nsectors) &&
  1330. (ubd_test_bit(end, (unsigned char *)
  1331. &req->sector_mask) == bit))
  1332. end++;
  1333. off = req->offset + req->offsets[bit] +
  1334. start * req->sectorsize;
  1335. len = (end - start) * req->sectorsize;
  1336. buf = &req->buffer[start * req->sectorsize];
  1337. err = os_seek_file(req->fds[bit], off);
  1338. if(err < 0){
  1339. printk("do_io - lseek failed : err = %d\n", -err);
  1340. req->error = 1;
  1341. return;
  1342. }
  1343. if(req->op == UBD_READ){
  1344. n = 0;
  1345. do {
  1346. buf = &buf[n];
  1347. len -= n;
  1348. n = os_read_file(req->fds[bit], buf, len);
  1349. if (n < 0) {
  1350. printk("do_io - read failed, err = %d "
  1351. "fd = %d\n", -n, req->fds[bit]);
  1352. req->error = 1;
  1353. return;
  1354. }
  1355. } while((n < len) && (n != 0));
  1356. if (n < len) memset(&buf[n], 0, len - n);
  1357. }
  1358. else {
  1359. n = os_write_file(req->fds[bit], buf, len);
  1360. if(n != len){
  1361. printk("do_io - write failed err = %d "
  1362. "fd = %d\n", -n, req->fds[bit]);
  1363. req->error = 1;
  1364. return;
  1365. }
  1366. }
  1367. start = end;
  1368. } while(start < nsectors);
  1369. req->error = update_bitmap(req);
  1370. }
  1371. /* Changed in start_io_thread, which is serialized by being called only
  1372. * from ubd_init, which is an initcall.
  1373. */
  1374. int kernel_fd = -1;
  1375. /* Only changed by the io thread */
  1376. int io_count = 0;
  1377. int io_thread(void *arg)
  1378. {
  1379. struct io_thread_req req;
  1380. int n;
  1381. ignore_sigwinch_sig();
  1382. while(1){
  1383. n = os_read_file(kernel_fd, &req, sizeof(req));
  1384. if(n != sizeof(req)){
  1385. if(n < 0)
  1386. printk("io_thread - read failed, fd = %d, "
  1387. "err = %d\n", kernel_fd, -n);
  1388. else {
  1389. printk("io_thread - short read, fd = %d, "
  1390. "length = %d\n", kernel_fd, n);
  1391. }
  1392. continue;
  1393. }
  1394. io_count++;
  1395. do_io(&req);
  1396. n = os_write_file(kernel_fd, &req, sizeof(req));
  1397. if(n != sizeof(req))
  1398. printk("io_thread - write failed, fd = %d, err = %d\n",
  1399. kernel_fd, -n);
  1400. }
  1401. }
  1402. /*
  1403. * Overrides for Emacs so that we follow Linus's tabbing style.
  1404. * Emacs will notice this stuff at the end of the file and automatically
  1405. * adjust the settings for this buffer only. This must remain at the end
  1406. * of the file.
  1407. * ---------------------------------------------------------------------------
  1408. * Local variables:
  1409. * c-file-style: "linux"
  1410. * End:
  1411. */