dm-log-userspace-base.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706
  1. /*
  2. * Copyright (C) 2006-2009 Red Hat, Inc.
  3. *
  4. * This file is released under the LGPL.
  5. */
  6. #include <linux/bio.h>
  7. #include <linux/slab.h>
  8. #include <linux/dm-dirty-log.h>
  9. #include <linux/device-mapper.h>
  10. #include <linux/dm-log-userspace.h>
  11. #include "dm-log-userspace-transfer.h"
  12. struct flush_entry {
  13. int type;
  14. region_t region;
  15. struct list_head list;
  16. };
  17. struct log_c {
  18. struct dm_target *ti;
  19. uint32_t region_size;
  20. region_t region_count;
  21. uint64_t luid;
  22. char uuid[DM_UUID_LEN];
  23. char *usr_argv_str;
  24. uint32_t usr_argc;
  25. /*
  26. * in_sync_hint gets set when doing is_remote_recovering. It
  27. * represents the first region that needs recovery. IOW, the
  28. * first zero bit of sync_bits. This can be useful for to limit
  29. * traffic for calls like is_remote_recovering and get_resync_work,
  30. * but be take care in its use for anything else.
  31. */
  32. uint64_t in_sync_hint;
  33. spinlock_t flush_lock;
  34. struct list_head flush_list; /* only for clear and mark requests */
  35. };
  36. static mempool_t *flush_entry_pool;
  37. static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data)
  38. {
  39. return kmalloc(sizeof(struct flush_entry), gfp_mask);
  40. }
  41. static void flush_entry_free(void *element, void *pool_data)
  42. {
  43. kfree(element);
  44. }
  45. static int userspace_do_request(struct log_c *lc, const char *uuid,
  46. int request_type, char *data, size_t data_size,
  47. char *rdata, size_t *rdata_size)
  48. {
  49. int r;
  50. /*
  51. * If the server isn't there, -ESRCH is returned,
  52. * and we must keep trying until the server is
  53. * restored.
  54. */
  55. retry:
  56. r = dm_consult_userspace(uuid, lc->luid, request_type, data,
  57. data_size, rdata, rdata_size);
  58. if (r != -ESRCH)
  59. return r;
  60. DMERR(" Userspace log server not found.");
  61. while (1) {
  62. set_current_state(TASK_INTERRUPTIBLE);
  63. schedule_timeout(2*HZ);
  64. DMWARN("Attempting to contact userspace log server...");
  65. r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_CTR,
  66. lc->usr_argv_str,
  67. strlen(lc->usr_argv_str) + 1,
  68. NULL, NULL);
  69. if (!r)
  70. break;
  71. }
  72. DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
  73. r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_RESUME, NULL,
  74. 0, NULL, NULL);
  75. if (!r)
  76. goto retry;
  77. DMERR("Error trying to resume userspace log: %d", r);
  78. return -ESRCH;
  79. }
  80. static int build_constructor_string(struct dm_target *ti,
  81. unsigned argc, char **argv,
  82. char **ctr_str)
  83. {
  84. int i, str_size;
  85. char *str = NULL;
  86. *ctr_str = NULL;
  87. for (i = 0, str_size = 0; i < argc; i++)
  88. str_size += strlen(argv[i]) + 1; /* +1 for space between args */
  89. str_size += 20; /* Max number of chars in a printed u64 number */
  90. str = kzalloc(str_size, GFP_KERNEL);
  91. if (!str) {
  92. DMWARN("Unable to allocate memory for constructor string");
  93. return -ENOMEM;
  94. }
  95. str_size = sprintf(str, "%llu", (unsigned long long)ti->len);
  96. for (i = 0; i < argc; i++)
  97. str_size += sprintf(str + str_size, " %s", argv[i]);
  98. *ctr_str = str;
  99. return str_size;
  100. }
  101. /*
  102. * userspace_ctr
  103. *
  104. * argv contains:
  105. * <UUID> <other args>
  106. * Where 'other args' is the userspace implementation specific log
  107. * arguments. An example might be:
  108. * <UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync]
  109. *
  110. * So, this module will strip off the <UUID> for identification purposes
  111. * when communicating with userspace about a log; but will pass on everything
  112. * else.
  113. */
  114. static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
  115. unsigned argc, char **argv)
  116. {
  117. int r = 0;
  118. int str_size;
  119. char *ctr_str = NULL;
  120. struct log_c *lc = NULL;
  121. uint64_t rdata;
  122. size_t rdata_size = sizeof(rdata);
  123. if (argc < 3) {
  124. DMWARN("Too few arguments to userspace dirty log");
  125. return -EINVAL;
  126. }
  127. lc = kmalloc(sizeof(*lc), GFP_KERNEL);
  128. if (!lc) {
  129. DMWARN("Unable to allocate userspace log context.");
  130. return -ENOMEM;
  131. }
  132. /* The ptr value is sufficient for local unique id */
  133. lc->luid = (unsigned long)lc;
  134. lc->ti = ti;
  135. if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
  136. DMWARN("UUID argument too long.");
  137. kfree(lc);
  138. return -EINVAL;
  139. }
  140. strncpy(lc->uuid, argv[0], DM_UUID_LEN);
  141. spin_lock_init(&lc->flush_lock);
  142. INIT_LIST_HEAD(&lc->flush_list);
  143. str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str);
  144. if (str_size < 0) {
  145. kfree(lc);
  146. return str_size;
  147. }
  148. /* Send table string */
  149. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR,
  150. ctr_str, str_size, NULL, NULL);
  151. if (r == -ESRCH) {
  152. DMERR("Userspace log server not found");
  153. goto out;
  154. }
  155. /* Since the region size does not change, get it now */
  156. rdata_size = sizeof(rdata);
  157. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_GET_REGION_SIZE,
  158. NULL, 0, (char *)&rdata, &rdata_size);
  159. if (r) {
  160. DMERR("Failed to get region size of dirty log");
  161. goto out;
  162. }
  163. lc->region_size = (uint32_t)rdata;
  164. lc->region_count = dm_sector_div_up(ti->len, lc->region_size);
  165. out:
  166. if (r) {
  167. kfree(lc);
  168. kfree(ctr_str);
  169. } else {
  170. lc->usr_argv_str = ctr_str;
  171. lc->usr_argc = argc;
  172. log->context = lc;
  173. }
  174. return r;
  175. }
  176. static void userspace_dtr(struct dm_dirty_log *log)
  177. {
  178. int r;
  179. struct log_c *lc = log->context;
  180. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR,
  181. NULL, 0,
  182. NULL, NULL);
  183. kfree(lc->usr_argv_str);
  184. kfree(lc);
  185. return;
  186. }
  187. static int userspace_presuspend(struct dm_dirty_log *log)
  188. {
  189. int r;
  190. struct log_c *lc = log->context;
  191. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND,
  192. NULL, 0,
  193. NULL, NULL);
  194. return r;
  195. }
  196. static int userspace_postsuspend(struct dm_dirty_log *log)
  197. {
  198. int r;
  199. struct log_c *lc = log->context;
  200. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND,
  201. NULL, 0,
  202. NULL, NULL);
  203. return r;
  204. }
  205. static int userspace_resume(struct dm_dirty_log *log)
  206. {
  207. int r;
  208. struct log_c *lc = log->context;
  209. lc->in_sync_hint = 0;
  210. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME,
  211. NULL, 0,
  212. NULL, NULL);
  213. return r;
  214. }
  215. static uint32_t userspace_get_region_size(struct dm_dirty_log *log)
  216. {
  217. struct log_c *lc = log->context;
  218. return lc->region_size;
  219. }
  220. /*
  221. * userspace_is_clean
  222. *
  223. * Check whether a region is clean. If there is any sort of
  224. * failure when consulting the server, we return not clean.
  225. *
  226. * Returns: 1 if clean, 0 otherwise
  227. */
  228. static int userspace_is_clean(struct dm_dirty_log *log, region_t region)
  229. {
  230. int r;
  231. uint64_t region64 = (uint64_t)region;
  232. int64_t is_clean;
  233. size_t rdata_size;
  234. struct log_c *lc = log->context;
  235. rdata_size = sizeof(is_clean);
  236. r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN,
  237. (char *)&region64, sizeof(region64),
  238. (char *)&is_clean, &rdata_size);
  239. return (r) ? 0 : (int)is_clean;
  240. }
  241. /*
  242. * userspace_in_sync
  243. *
  244. * Check if the region is in-sync. If there is any sort
  245. * of failure when consulting the server, we assume that
  246. * the region is not in sync.
  247. *
  248. * If 'can_block' is set, return immediately
  249. *
  250. * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK
  251. */
  252. static int userspace_in_sync(struct dm_dirty_log *log, region_t region,
  253. int can_block)
  254. {
  255. int r;
  256. uint64_t region64 = region;
  257. int64_t in_sync;
  258. size_t rdata_size;
  259. struct log_c *lc = log->context;
  260. /*
  261. * We can never respond directly - even if in_sync_hint is
  262. * set. This is because another machine could see a device
  263. * failure and mark the region out-of-sync. If we don't go
  264. * to userspace to ask, we might think the region is in-sync
  265. * and allow a read to pick up data that is stale. (This is
  266. * very unlikely if a device actually fails; but it is very
  267. * likely if a connection to one device from one machine fails.)
  268. *
  269. * There still might be a problem if the mirror caches the region
  270. * state as in-sync... but then this call would not be made. So,
  271. * that is a mirror problem.
  272. */
  273. if (!can_block)
  274. return -EWOULDBLOCK;
  275. rdata_size = sizeof(in_sync);
  276. r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC,
  277. (char *)&region64, sizeof(region64),
  278. (char *)&in_sync, &rdata_size);
  279. return (r) ? 0 : (int)in_sync;
  280. }
  281. /*
  282. * userspace_flush
  283. *
  284. * This function is ok to block.
  285. * The flush happens in two stages. First, it sends all
  286. * clear/mark requests that are on the list. Then it
  287. * tells the server to commit them. This gives the
  288. * server a chance to optimise the commit, instead of
  289. * doing it for every request.
  290. *
  291. * Additionally, we could implement another thread that
  292. * sends the requests up to the server - reducing the
  293. * load on flush. Then the flush would have less in
  294. * the list and be responsible for the finishing commit.
  295. *
  296. * Returns: 0 on success, < 0 on failure
  297. */
  298. static int userspace_flush(struct dm_dirty_log *log)
  299. {
  300. int r = 0;
  301. unsigned long flags;
  302. struct log_c *lc = log->context;
  303. LIST_HEAD(flush_list);
  304. struct flush_entry *fe, *tmp_fe;
  305. spin_lock_irqsave(&lc->flush_lock, flags);
  306. list_splice_init(&lc->flush_list, &flush_list);
  307. spin_unlock_irqrestore(&lc->flush_lock, flags);
  308. if (list_empty(&flush_list))
  309. return 0;
  310. /*
  311. * FIXME: Count up requests, group request types,
  312. * allocate memory to stick all requests in and
  313. * send to server in one go. Failing the allocation,
  314. * do it one by one.
  315. */
  316. list_for_each_entry(fe, &flush_list, list) {
  317. r = userspace_do_request(lc, lc->uuid, fe->type,
  318. (char *)&fe->region,
  319. sizeof(fe->region),
  320. NULL, NULL);
  321. if (r)
  322. goto fail;
  323. }
  324. r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
  325. NULL, 0, NULL, NULL);
  326. fail:
  327. /*
  328. * We can safely remove these entries, even if failure.
  329. * Calling code will receive an error and will know that
  330. * the log facility has failed.
  331. */
  332. list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) {
  333. list_del(&fe->list);
  334. mempool_free(fe, flush_entry_pool);
  335. }
  336. if (r)
  337. dm_table_event(lc->ti->table);
  338. return r;
  339. }
  340. /*
  341. * userspace_mark_region
  342. *
  343. * This function should avoid blocking unless absolutely required.
  344. * (Memory allocation is valid for blocking.)
  345. */
  346. static void userspace_mark_region(struct dm_dirty_log *log, region_t region)
  347. {
  348. unsigned long flags;
  349. struct log_c *lc = log->context;
  350. struct flush_entry *fe;
  351. /* Wait for an allocation, but _never_ fail */
  352. fe = mempool_alloc(flush_entry_pool, GFP_NOIO);
  353. BUG_ON(!fe);
  354. spin_lock_irqsave(&lc->flush_lock, flags);
  355. fe->type = DM_ULOG_MARK_REGION;
  356. fe->region = region;
  357. list_add(&fe->list, &lc->flush_list);
  358. spin_unlock_irqrestore(&lc->flush_lock, flags);
  359. return;
  360. }
  361. /*
  362. * userspace_clear_region
  363. *
  364. * This function must not block.
  365. * So, the alloc can't block. In the worst case, it is ok to
  366. * fail. It would simply mean we can't clear the region.
  367. * Does nothing to current sync context, but does mean
  368. * the region will be re-sync'ed on a reload of the mirror
  369. * even though it is in-sync.
  370. */
  371. static void userspace_clear_region(struct dm_dirty_log *log, region_t region)
  372. {
  373. unsigned long flags;
  374. struct log_c *lc = log->context;
  375. struct flush_entry *fe;
  376. /*
  377. * If we fail to allocate, we skip the clearing of
  378. * the region. This doesn't hurt us in any way, except
  379. * to cause the region to be resync'ed when the
  380. * device is activated next time.
  381. */
  382. fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC);
  383. if (!fe) {
  384. DMERR("Failed to allocate memory to clear region.");
  385. return;
  386. }
  387. spin_lock_irqsave(&lc->flush_lock, flags);
  388. fe->type = DM_ULOG_CLEAR_REGION;
  389. fe->region = region;
  390. list_add(&fe->list, &lc->flush_list);
  391. spin_unlock_irqrestore(&lc->flush_lock, flags);
  392. return;
  393. }
  394. /*
  395. * userspace_get_resync_work
  396. *
  397. * Get a region that needs recovery. It is valid to return
  398. * an error for this function.
  399. *
  400. * Returns: 1 if region filled, 0 if no work, <0 on error
  401. */
  402. static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region)
  403. {
  404. int r;
  405. size_t rdata_size;
  406. struct log_c *lc = log->context;
  407. struct {
  408. int64_t i; /* 64-bit for mix arch compatibility */
  409. region_t r;
  410. } pkg;
  411. if (lc->in_sync_hint >= lc->region_count)
  412. return 0;
  413. rdata_size = sizeof(pkg);
  414. r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK,
  415. NULL, 0,
  416. (char *)&pkg, &rdata_size);
  417. *region = pkg.r;
  418. return (r) ? r : (int)pkg.i;
  419. }
  420. /*
  421. * userspace_set_region_sync
  422. *
  423. * Set the sync status of a given region. This function
  424. * must not fail.
  425. */
  426. static void userspace_set_region_sync(struct dm_dirty_log *log,
  427. region_t region, int in_sync)
  428. {
  429. int r;
  430. struct log_c *lc = log->context;
  431. struct {
  432. region_t r;
  433. int64_t i;
  434. } pkg;
  435. pkg.r = region;
  436. pkg.i = (int64_t)in_sync;
  437. r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC,
  438. (char *)&pkg, sizeof(pkg),
  439. NULL, NULL);
  440. /*
  441. * It would be nice to be able to report failures.
  442. * However, it is easy emough to detect and resolve.
  443. */
  444. return;
  445. }
  446. /*
  447. * userspace_get_sync_count
  448. *
  449. * If there is any sort of failure when consulting the server,
  450. * we assume that the sync count is zero.
  451. *
  452. * Returns: sync count on success, 0 on failure
  453. */
  454. static region_t userspace_get_sync_count(struct dm_dirty_log *log)
  455. {
  456. int r;
  457. size_t rdata_size;
  458. uint64_t sync_count;
  459. struct log_c *lc = log->context;
  460. rdata_size = sizeof(sync_count);
  461. r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT,
  462. NULL, 0,
  463. (char *)&sync_count, &rdata_size);
  464. if (r)
  465. return 0;
  466. if (sync_count >= lc->region_count)
  467. lc->in_sync_hint = lc->region_count;
  468. return (region_t)sync_count;
  469. }
  470. /*
  471. * userspace_status
  472. *
  473. * Returns: amount of space consumed
  474. */
  475. static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
  476. char *result, unsigned maxlen)
  477. {
  478. int r = 0;
  479. char *table_args;
  480. size_t sz = (size_t)maxlen;
  481. struct log_c *lc = log->context;
  482. switch (status_type) {
  483. case STATUSTYPE_INFO:
  484. r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO,
  485. NULL, 0,
  486. result, &sz);
  487. if (r) {
  488. sz = 0;
  489. DMEMIT("%s 1 COM_FAILURE", log->type->name);
  490. }
  491. break;
  492. case STATUSTYPE_TABLE:
  493. sz = 0;
  494. table_args = strchr(lc->usr_argv_str, ' ');
  495. BUG_ON(!table_args); /* There will always be a ' ' */
  496. table_args++;
  497. DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc,
  498. lc->uuid, table_args);
  499. break;
  500. }
  501. return (r) ? 0 : (int)sz;
  502. }
  503. /*
  504. * userspace_is_remote_recovering
  505. *
  506. * Returns: 1 if region recovering, 0 otherwise
  507. */
  508. static int userspace_is_remote_recovering(struct dm_dirty_log *log,
  509. region_t region)
  510. {
  511. int r;
  512. uint64_t region64 = region;
  513. struct log_c *lc = log->context;
  514. static unsigned long long limit;
  515. struct {
  516. int64_t is_recovering;
  517. uint64_t in_sync_hint;
  518. } pkg;
  519. size_t rdata_size = sizeof(pkg);
  520. /*
  521. * Once the mirror has been reported to be in-sync,
  522. * it will never again ask for recovery work. So,
  523. * we can safely say there is not a remote machine
  524. * recovering if the device is in-sync. (in_sync_hint
  525. * must be reset at resume time.)
  526. */
  527. if (region < lc->in_sync_hint)
  528. return 0;
  529. else if (jiffies < limit)
  530. return 1;
  531. limit = jiffies + (HZ / 4);
  532. r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING,
  533. (char *)&region64, sizeof(region64),
  534. (char *)&pkg, &rdata_size);
  535. if (r)
  536. return 1;
  537. lc->in_sync_hint = pkg.in_sync_hint;
  538. return (int)pkg.is_recovering;
  539. }
  540. static struct dm_dirty_log_type _userspace_type = {
  541. .name = "userspace",
  542. .module = THIS_MODULE,
  543. .ctr = userspace_ctr,
  544. .dtr = userspace_dtr,
  545. .presuspend = userspace_presuspend,
  546. .postsuspend = userspace_postsuspend,
  547. .resume = userspace_resume,
  548. .get_region_size = userspace_get_region_size,
  549. .is_clean = userspace_is_clean,
  550. .in_sync = userspace_in_sync,
  551. .flush = userspace_flush,
  552. .mark_region = userspace_mark_region,
  553. .clear_region = userspace_clear_region,
  554. .get_resync_work = userspace_get_resync_work,
  555. .set_region_sync = userspace_set_region_sync,
  556. .get_sync_count = userspace_get_sync_count,
  557. .status = userspace_status,
  558. .is_remote_recovering = userspace_is_remote_recovering,
  559. };
  560. static int __init userspace_dirty_log_init(void)
  561. {
  562. int r = 0;
  563. flush_entry_pool = mempool_create(100, flush_entry_alloc,
  564. flush_entry_free, NULL);
  565. if (!flush_entry_pool) {
  566. DMWARN("Unable to create flush_entry_pool: No memory.");
  567. return -ENOMEM;
  568. }
  569. r = dm_ulog_tfr_init();
  570. if (r) {
  571. DMWARN("Unable to initialize userspace log communications");
  572. mempool_destroy(flush_entry_pool);
  573. return r;
  574. }
  575. r = dm_dirty_log_type_register(&_userspace_type);
  576. if (r) {
  577. DMWARN("Couldn't register userspace dirty log type");
  578. dm_ulog_tfr_exit();
  579. mempool_destroy(flush_entry_pool);
  580. return r;
  581. }
  582. DMINFO("version 1.0.0 loaded");
  583. return 0;
  584. }
  585. static void __exit userspace_dirty_log_exit(void)
  586. {
  587. dm_dirty_log_type_unregister(&_userspace_type);
  588. dm_ulog_tfr_exit();
  589. mempool_destroy(flush_entry_pool);
  590. DMINFO("version 1.0.0 unloaded");
  591. return;
  592. }
  593. module_init(userspace_dirty_log_init);
  594. module_exit(userspace_dirty_log_exit);
  595. MODULE_DESCRIPTION(DM_NAME " userspace dirty log link");
  596. MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>");
  597. MODULE_LICENSE("GPL");