dm-log-userspace-base.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705
  1. /*
  2. * Copyright (C) 2006-2009 Red Hat, Inc.
  3. *
  4. * This file is released under the LGPL.
  5. */
  6. #include <linux/bio.h>
  7. #include <linux/dm-dirty-log.h>
  8. #include <linux/device-mapper.h>
  9. #include <linux/dm-log-userspace.h>
  10. #include "dm-log-userspace-transfer.h"
  11. struct flush_entry {
  12. int type;
  13. region_t region;
  14. struct list_head list;
  15. };
  16. struct log_c {
  17. struct dm_target *ti;
  18. uint32_t region_size;
  19. region_t region_count;
  20. uint64_t luid;
  21. char uuid[DM_UUID_LEN];
  22. char *usr_argv_str;
  23. uint32_t usr_argc;
  24. /*
  25. * in_sync_hint gets set when doing is_remote_recovering. It
  26. * represents the first region that needs recovery. IOW, the
  27. * first zero bit of sync_bits. This can be useful for to limit
  28. * traffic for calls like is_remote_recovering and get_resync_work,
  29. * but be take care in its use for anything else.
  30. */
  31. uint64_t in_sync_hint;
  32. spinlock_t flush_lock;
  33. struct list_head flush_list; /* only for clear and mark requests */
  34. };
  35. static mempool_t *flush_entry_pool;
  36. static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data)
  37. {
  38. return kmalloc(sizeof(struct flush_entry), gfp_mask);
  39. }
  40. static void flush_entry_free(void *element, void *pool_data)
  41. {
  42. kfree(element);
  43. }
  44. static int userspace_do_request(struct log_c *lc, const char *uuid,
  45. int request_type, char *data, size_t data_size,
  46. char *rdata, size_t *rdata_size)
  47. {
  48. int r;
  49. /*
  50. * If the server isn't there, -ESRCH is returned,
  51. * and we must keep trying until the server is
  52. * restored.
  53. */
  54. retry:
  55. r = dm_consult_userspace(uuid, lc->luid, request_type, data,
  56. data_size, rdata, rdata_size);
  57. if (r != -ESRCH)
  58. return r;
  59. DMERR(" Userspace log server not found.");
  60. while (1) {
  61. set_current_state(TASK_INTERRUPTIBLE);
  62. schedule_timeout(2*HZ);
  63. DMWARN("Attempting to contact userspace log server...");
  64. r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_CTR,
  65. lc->usr_argv_str,
  66. strlen(lc->usr_argv_str) + 1,
  67. NULL, NULL);
  68. if (!r)
  69. break;
  70. }
  71. DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
  72. r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_RESUME, NULL,
  73. 0, NULL, NULL);
  74. if (!r)
  75. goto retry;
  76. DMERR("Error trying to resume userspace log: %d", r);
  77. return -ESRCH;
  78. }
  79. static int build_constructor_string(struct dm_target *ti,
  80. unsigned argc, char **argv,
  81. char **ctr_str)
  82. {
  83. int i, str_size;
  84. char *str = NULL;
  85. *ctr_str = NULL;
  86. for (i = 0, str_size = 0; i < argc; i++)
  87. str_size += strlen(argv[i]) + 1; /* +1 for space between args */
  88. str_size += 20; /* Max number of chars in a printed u64 number */
  89. str = kzalloc(str_size, GFP_KERNEL);
  90. if (!str) {
  91. DMWARN("Unable to allocate memory for constructor string");
  92. return -ENOMEM;
  93. }
  94. str_size = sprintf(str, "%llu", (unsigned long long)ti->len);
  95. for (i = 0; i < argc; i++)
  96. str_size += sprintf(str + str_size, " %s", argv[i]);
  97. *ctr_str = str;
  98. return str_size;
  99. }
  100. /*
  101. * userspace_ctr
  102. *
  103. * argv contains:
  104. * <UUID> <other args>
  105. * Where 'other args' is the userspace implementation specific log
  106. * arguments. An example might be:
  107. * <UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync]
  108. *
  109. * So, this module will strip off the <UUID> for identification purposes
  110. * when communicating with userspace about a log; but will pass on everything
  111. * else.
  112. */
  113. static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
  114. unsigned argc, char **argv)
  115. {
  116. int r = 0;
  117. int str_size;
  118. char *ctr_str = NULL;
  119. struct log_c *lc = NULL;
  120. uint64_t rdata;
  121. size_t rdata_size = sizeof(rdata);
  122. if (argc < 3) {
  123. DMWARN("Too few arguments to userspace dirty log");
  124. return -EINVAL;
  125. }
  126. lc = kmalloc(sizeof(*lc), GFP_KERNEL);
  127. if (!lc) {
  128. DMWARN("Unable to allocate userspace log context.");
  129. return -ENOMEM;
  130. }
  131. /* The ptr value is sufficient for local unique id */
  132. lc->luid = (uint64_t)lc;
  133. lc->ti = ti;
  134. if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
  135. DMWARN("UUID argument too long.");
  136. kfree(lc);
  137. return -EINVAL;
  138. }
  139. strncpy(lc->uuid, argv[0], DM_UUID_LEN);
  140. spin_lock_init(&lc->flush_lock);
  141. INIT_LIST_HEAD(&lc->flush_list);
  142. str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str);
  143. if (str_size < 0) {
  144. kfree(lc);
  145. return str_size;
  146. }
  147. /* Send table string */
  148. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR,
  149. ctr_str, str_size, NULL, NULL);
  150. if (r == -ESRCH) {
  151. DMERR("Userspace log server not found");
  152. goto out;
  153. }
  154. /* Since the region size does not change, get it now */
  155. rdata_size = sizeof(rdata);
  156. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_GET_REGION_SIZE,
  157. NULL, 0, (char *)&rdata, &rdata_size);
  158. if (r) {
  159. DMERR("Failed to get region size of dirty log");
  160. goto out;
  161. }
  162. lc->region_size = (uint32_t)rdata;
  163. lc->region_count = dm_sector_div_up(ti->len, lc->region_size);
  164. out:
  165. if (r) {
  166. kfree(lc);
  167. kfree(ctr_str);
  168. } else {
  169. lc->usr_argv_str = ctr_str;
  170. lc->usr_argc = argc;
  171. log->context = lc;
  172. }
  173. return r;
  174. }
  175. static void userspace_dtr(struct dm_dirty_log *log)
  176. {
  177. int r;
  178. struct log_c *lc = log->context;
  179. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR,
  180. NULL, 0,
  181. NULL, NULL);
  182. kfree(lc->usr_argv_str);
  183. kfree(lc);
  184. return;
  185. }
  186. static int userspace_presuspend(struct dm_dirty_log *log)
  187. {
  188. int r;
  189. struct log_c *lc = log->context;
  190. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND,
  191. NULL, 0,
  192. NULL, NULL);
  193. return r;
  194. }
  195. static int userspace_postsuspend(struct dm_dirty_log *log)
  196. {
  197. int r;
  198. struct log_c *lc = log->context;
  199. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND,
  200. NULL, 0,
  201. NULL, NULL);
  202. return r;
  203. }
  204. static int userspace_resume(struct dm_dirty_log *log)
  205. {
  206. int r;
  207. struct log_c *lc = log->context;
  208. lc->in_sync_hint = 0;
  209. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME,
  210. NULL, 0,
  211. NULL, NULL);
  212. return r;
  213. }
  214. static uint32_t userspace_get_region_size(struct dm_dirty_log *log)
  215. {
  216. struct log_c *lc = log->context;
  217. return lc->region_size;
  218. }
  219. /*
  220. * userspace_is_clean
  221. *
  222. * Check whether a region is clean. If there is any sort of
  223. * failure when consulting the server, we return not clean.
  224. *
  225. * Returns: 1 if clean, 0 otherwise
  226. */
  227. static int userspace_is_clean(struct dm_dirty_log *log, region_t region)
  228. {
  229. int r;
  230. uint64_t region64 = (uint64_t)region;
  231. int64_t is_clean;
  232. size_t rdata_size;
  233. struct log_c *lc = log->context;
  234. rdata_size = sizeof(is_clean);
  235. r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN,
  236. (char *)&region64, sizeof(region64),
  237. (char *)&is_clean, &rdata_size);
  238. return (r) ? 0 : (int)is_clean;
  239. }
  240. /*
  241. * userspace_in_sync
  242. *
  243. * Check if the region is in-sync. If there is any sort
  244. * of failure when consulting the server, we assume that
  245. * the region is not in sync.
  246. *
  247. * If 'can_block' is set, return immediately
  248. *
  249. * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK
  250. */
  251. static int userspace_in_sync(struct dm_dirty_log *log, region_t region,
  252. int can_block)
  253. {
  254. int r;
  255. uint64_t region64 = region;
  256. int64_t in_sync;
  257. size_t rdata_size;
  258. struct log_c *lc = log->context;
  259. /*
  260. * We can never respond directly - even if in_sync_hint is
  261. * set. This is because another machine could see a device
  262. * failure and mark the region out-of-sync. If we don't go
  263. * to userspace to ask, we might think the region is in-sync
  264. * and allow a read to pick up data that is stale. (This is
  265. * very unlikely if a device actually fails; but it is very
  266. * likely if a connection to one device from one machine fails.)
  267. *
  268. * There still might be a problem if the mirror caches the region
  269. * state as in-sync... but then this call would not be made. So,
  270. * that is a mirror problem.
  271. */
  272. if (!can_block)
  273. return -EWOULDBLOCK;
  274. rdata_size = sizeof(in_sync);
  275. r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC,
  276. (char *)&region64, sizeof(region64),
  277. (char *)&in_sync, &rdata_size);
  278. return (r) ? 0 : (int)in_sync;
  279. }
  280. /*
  281. * userspace_flush
  282. *
  283. * This function is ok to block.
  284. * The flush happens in two stages. First, it sends all
  285. * clear/mark requests that are on the list. Then it
  286. * tells the server to commit them. This gives the
  287. * server a chance to optimise the commit, instead of
  288. * doing it for every request.
  289. *
  290. * Additionally, we could implement another thread that
  291. * sends the requests up to the server - reducing the
  292. * load on flush. Then the flush would have less in
  293. * the list and be responsible for the finishing commit.
  294. *
  295. * Returns: 0 on success, < 0 on failure
  296. */
  297. static int userspace_flush(struct dm_dirty_log *log)
  298. {
  299. int r = 0;
  300. unsigned long flags;
  301. struct log_c *lc = log->context;
  302. LIST_HEAD(flush_list);
  303. struct flush_entry *fe, *tmp_fe;
  304. spin_lock_irqsave(&lc->flush_lock, flags);
  305. list_splice_init(&lc->flush_list, &flush_list);
  306. spin_unlock_irqrestore(&lc->flush_lock, flags);
  307. if (list_empty(&flush_list))
  308. return 0;
  309. /*
  310. * FIXME: Count up requests, group request types,
  311. * allocate memory to stick all requests in and
  312. * send to server in one go. Failing the allocation,
  313. * do it one by one.
  314. */
  315. list_for_each_entry(fe, &flush_list, list) {
  316. r = userspace_do_request(lc, lc->uuid, fe->type,
  317. (char *)&fe->region,
  318. sizeof(fe->region),
  319. NULL, NULL);
  320. if (r)
  321. goto fail;
  322. }
  323. r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
  324. NULL, 0, NULL, NULL);
  325. fail:
  326. /*
  327. * We can safely remove these entries, even if failure.
  328. * Calling code will receive an error and will know that
  329. * the log facility has failed.
  330. */
  331. list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) {
  332. list_del(&fe->list);
  333. mempool_free(fe, flush_entry_pool);
  334. }
  335. if (r)
  336. dm_table_event(lc->ti->table);
  337. return r;
  338. }
  339. /*
  340. * userspace_mark_region
  341. *
  342. * This function should avoid blocking unless absolutely required.
  343. * (Memory allocation is valid for blocking.)
  344. */
  345. static void userspace_mark_region(struct dm_dirty_log *log, region_t region)
  346. {
  347. unsigned long flags;
  348. struct log_c *lc = log->context;
  349. struct flush_entry *fe;
  350. /* Wait for an allocation, but _never_ fail */
  351. fe = mempool_alloc(flush_entry_pool, GFP_NOIO);
  352. BUG_ON(!fe);
  353. spin_lock_irqsave(&lc->flush_lock, flags);
  354. fe->type = DM_ULOG_MARK_REGION;
  355. fe->region = region;
  356. list_add(&fe->list, &lc->flush_list);
  357. spin_unlock_irqrestore(&lc->flush_lock, flags);
  358. return;
  359. }
  360. /*
  361. * userspace_clear_region
  362. *
  363. * This function must not block.
  364. * So, the alloc can't block. In the worst case, it is ok to
  365. * fail. It would simply mean we can't clear the region.
  366. * Does nothing to current sync context, but does mean
  367. * the region will be re-sync'ed on a reload of the mirror
  368. * even though it is in-sync.
  369. */
  370. static void userspace_clear_region(struct dm_dirty_log *log, region_t region)
  371. {
  372. unsigned long flags;
  373. struct log_c *lc = log->context;
  374. struct flush_entry *fe;
  375. /*
  376. * If we fail to allocate, we skip the clearing of
  377. * the region. This doesn't hurt us in any way, except
  378. * to cause the region to be resync'ed when the
  379. * device is activated next time.
  380. */
  381. fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC);
  382. if (!fe) {
  383. DMERR("Failed to allocate memory to clear region.");
  384. return;
  385. }
  386. spin_lock_irqsave(&lc->flush_lock, flags);
  387. fe->type = DM_ULOG_CLEAR_REGION;
  388. fe->region = region;
  389. list_add(&fe->list, &lc->flush_list);
  390. spin_unlock_irqrestore(&lc->flush_lock, flags);
  391. return;
  392. }
  393. /*
  394. * userspace_get_resync_work
  395. *
  396. * Get a region that needs recovery. It is valid to return
  397. * an error for this function.
  398. *
  399. * Returns: 1 if region filled, 0 if no work, <0 on error
  400. */
  401. static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region)
  402. {
  403. int r;
  404. size_t rdata_size;
  405. struct log_c *lc = log->context;
  406. struct {
  407. int64_t i; /* 64-bit for mix arch compatibility */
  408. region_t r;
  409. } pkg;
  410. if (lc->in_sync_hint >= lc->region_count)
  411. return 0;
  412. rdata_size = sizeof(pkg);
  413. r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK,
  414. NULL, 0,
  415. (char *)&pkg, &rdata_size);
  416. *region = pkg.r;
  417. return (r) ? r : (int)pkg.i;
  418. }
  419. /*
  420. * userspace_set_region_sync
  421. *
  422. * Set the sync status of a given region. This function
  423. * must not fail.
  424. */
  425. static void userspace_set_region_sync(struct dm_dirty_log *log,
  426. region_t region, int in_sync)
  427. {
  428. int r;
  429. struct log_c *lc = log->context;
  430. struct {
  431. region_t r;
  432. int64_t i;
  433. } pkg;
  434. pkg.r = region;
  435. pkg.i = (int64_t)in_sync;
  436. r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC,
  437. (char *)&pkg, sizeof(pkg),
  438. NULL, NULL);
  439. /*
  440. * It would be nice to be able to report failures.
  441. * However, it is easy emough to detect and resolve.
  442. */
  443. return;
  444. }
  445. /*
  446. * userspace_get_sync_count
  447. *
  448. * If there is any sort of failure when consulting the server,
  449. * we assume that the sync count is zero.
  450. *
  451. * Returns: sync count on success, 0 on failure
  452. */
  453. static region_t userspace_get_sync_count(struct dm_dirty_log *log)
  454. {
  455. int r;
  456. size_t rdata_size;
  457. uint64_t sync_count;
  458. struct log_c *lc = log->context;
  459. rdata_size = sizeof(sync_count);
  460. r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT,
  461. NULL, 0,
  462. (char *)&sync_count, &rdata_size);
  463. if (r)
  464. return 0;
  465. if (sync_count >= lc->region_count)
  466. lc->in_sync_hint = lc->region_count;
  467. return (region_t)sync_count;
  468. }
  469. /*
  470. * userspace_status
  471. *
  472. * Returns: amount of space consumed
  473. */
  474. static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
  475. char *result, unsigned maxlen)
  476. {
  477. int r = 0;
  478. char *table_args;
  479. size_t sz = (size_t)maxlen;
  480. struct log_c *lc = log->context;
  481. switch (status_type) {
  482. case STATUSTYPE_INFO:
  483. r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO,
  484. NULL, 0,
  485. result, &sz);
  486. if (r) {
  487. sz = 0;
  488. DMEMIT("%s 1 COM_FAILURE", log->type->name);
  489. }
  490. break;
  491. case STATUSTYPE_TABLE:
  492. sz = 0;
  493. table_args = strstr(lc->usr_argv_str, " ");
  494. BUG_ON(!table_args); /* There will always be a ' ' */
  495. table_args++;
  496. DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc,
  497. lc->uuid, table_args);
  498. break;
  499. }
  500. return (r) ? 0 : (int)sz;
  501. }
  502. /*
  503. * userspace_is_remote_recovering
  504. *
  505. * Returns: 1 if region recovering, 0 otherwise
  506. */
  507. static int userspace_is_remote_recovering(struct dm_dirty_log *log,
  508. region_t region)
  509. {
  510. int r;
  511. uint64_t region64 = region;
  512. struct log_c *lc = log->context;
  513. static unsigned long long limit;
  514. struct {
  515. int64_t is_recovering;
  516. uint64_t in_sync_hint;
  517. } pkg;
  518. size_t rdata_size = sizeof(pkg);
  519. /*
  520. * Once the mirror has been reported to be in-sync,
  521. * it will never again ask for recovery work. So,
  522. * we can safely say there is not a remote machine
  523. * recovering if the device is in-sync. (in_sync_hint
  524. * must be reset at resume time.)
  525. */
  526. if (region < lc->in_sync_hint)
  527. return 0;
  528. else if (jiffies < limit)
  529. return 1;
  530. limit = jiffies + (HZ / 4);
  531. r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING,
  532. (char *)&region64, sizeof(region64),
  533. (char *)&pkg, &rdata_size);
  534. if (r)
  535. return 1;
  536. lc->in_sync_hint = pkg.in_sync_hint;
  537. return (int)pkg.is_recovering;
  538. }
  539. static struct dm_dirty_log_type _userspace_type = {
  540. .name = "userspace",
  541. .module = THIS_MODULE,
  542. .ctr = userspace_ctr,
  543. .dtr = userspace_dtr,
  544. .presuspend = userspace_presuspend,
  545. .postsuspend = userspace_postsuspend,
  546. .resume = userspace_resume,
  547. .get_region_size = userspace_get_region_size,
  548. .is_clean = userspace_is_clean,
  549. .in_sync = userspace_in_sync,
  550. .flush = userspace_flush,
  551. .mark_region = userspace_mark_region,
  552. .clear_region = userspace_clear_region,
  553. .get_resync_work = userspace_get_resync_work,
  554. .set_region_sync = userspace_set_region_sync,
  555. .get_sync_count = userspace_get_sync_count,
  556. .status = userspace_status,
  557. .is_remote_recovering = userspace_is_remote_recovering,
  558. };
  559. static int __init userspace_dirty_log_init(void)
  560. {
  561. int r = 0;
  562. flush_entry_pool = mempool_create(100, flush_entry_alloc,
  563. flush_entry_free, NULL);
  564. if (!flush_entry_pool) {
  565. DMWARN("Unable to create flush_entry_pool: No memory.");
  566. return -ENOMEM;
  567. }
  568. r = dm_ulog_tfr_init();
  569. if (r) {
  570. DMWARN("Unable to initialize userspace log communications");
  571. mempool_destroy(flush_entry_pool);
  572. return r;
  573. }
  574. r = dm_dirty_log_type_register(&_userspace_type);
  575. if (r) {
  576. DMWARN("Couldn't register userspace dirty log type");
  577. dm_ulog_tfr_exit();
  578. mempool_destroy(flush_entry_pool);
  579. return r;
  580. }
  581. DMINFO("version 1.0.0 loaded");
  582. return 0;
  583. }
  584. static void __exit userspace_dirty_log_exit(void)
  585. {
  586. dm_dirty_log_type_unregister(&_userspace_type);
  587. dm_ulog_tfr_exit();
  588. mempool_destroy(flush_entry_pool);
  589. DMINFO("version 1.0.0 unloaded");
  590. return;
  591. }
  592. module_init(userspace_dirty_log_init);
  593. module_exit(userspace_dirty_log_exit);
  594. MODULE_DESCRIPTION(DM_NAME " userspace dirty log link");
  595. MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>");
  596. MODULE_LICENSE("GPL");