dm-log-userspace-base.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700
  1. /*
  2. * Copyright (C) 2006-2009 Red Hat, Inc.
  3. *
  4. * This file is released under the LGPL.
  5. */
  6. #include <linux/bio.h>
  7. #include <linux/dm-dirty-log.h>
  8. #include <linux/device-mapper.h>
  9. #include <linux/dm-log-userspace.h>
  10. #include "dm-log-userspace-transfer.h"
  11. struct flush_entry {
  12. int type;
  13. region_t region;
  14. struct list_head list;
  15. };
  16. struct log_c {
  17. struct dm_target *ti;
  18. uint32_t region_size;
  19. region_t region_count;
  20. char uuid[DM_UUID_LEN];
  21. char *usr_argv_str;
  22. uint32_t usr_argc;
  23. /*
  24. * in_sync_hint gets set when doing is_remote_recovering. It
  25. * represents the first region that needs recovery. IOW, the
  26. * first zero bit of sync_bits. This can be useful for to limit
  27. * traffic for calls like is_remote_recovering and get_resync_work,
  28. * but be take care in its use for anything else.
  29. */
  30. uint64_t in_sync_hint;
  31. spinlock_t flush_lock;
  32. struct list_head flush_list; /* only for clear and mark requests */
  33. };
  34. static mempool_t *flush_entry_pool;
  35. static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data)
  36. {
  37. return kmalloc(sizeof(struct flush_entry), gfp_mask);
  38. }
  39. static void flush_entry_free(void *element, void *pool_data)
  40. {
  41. kfree(element);
  42. }
  43. static int userspace_do_request(struct log_c *lc, const char *uuid,
  44. int request_type, char *data, size_t data_size,
  45. char *rdata, size_t *rdata_size)
  46. {
  47. int r;
  48. /*
  49. * If the server isn't there, -ESRCH is returned,
  50. * and we must keep trying until the server is
  51. * restored.
  52. */
  53. retry:
  54. r = dm_consult_userspace(uuid, request_type, data,
  55. data_size, rdata, rdata_size);
  56. if (r != -ESRCH)
  57. return r;
  58. DMERR(" Userspace log server not found.");
  59. while (1) {
  60. set_current_state(TASK_INTERRUPTIBLE);
  61. schedule_timeout(2*HZ);
  62. DMWARN("Attempting to contact userspace log server...");
  63. r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str,
  64. strlen(lc->usr_argv_str) + 1,
  65. NULL, NULL);
  66. if (!r)
  67. break;
  68. }
  69. DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
  70. r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL,
  71. 0, NULL, NULL);
  72. if (!r)
  73. goto retry;
  74. DMERR("Error trying to resume userspace log: %d", r);
  75. return -ESRCH;
  76. }
  77. static int build_constructor_string(struct dm_target *ti,
  78. unsigned argc, char **argv,
  79. char **ctr_str)
  80. {
  81. int i, str_size;
  82. char *str = NULL;
  83. *ctr_str = NULL;
  84. for (i = 0, str_size = 0; i < argc; i++)
  85. str_size += strlen(argv[i]) + 1; /* +1 for space between args */
  86. str_size += 20; /* Max number of chars in a printed u64 number */
  87. str = kzalloc(str_size, GFP_KERNEL);
  88. if (!str) {
  89. DMWARN("Unable to allocate memory for constructor string");
  90. return -ENOMEM;
  91. }
  92. str_size = sprintf(str, "%llu", (unsigned long long)ti->len);
  93. for (i = 0; i < argc; i++)
  94. str_size += sprintf(str + str_size, " %s", argv[i]);
  95. *ctr_str = str;
  96. return str_size;
  97. }
  98. /*
  99. * userspace_ctr
  100. *
  101. * argv contains:
  102. * <UUID> <other args>
  103. * Where 'other args' is the userspace implementation specific log
  104. * arguments. An example might be:
  105. * <UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync]
  106. *
  107. * So, this module will strip off the <UUID> for identification purposes
  108. * when communicating with userspace about a log; but will pass on everything
  109. * else.
  110. */
  111. static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
  112. unsigned argc, char **argv)
  113. {
  114. int r = 0;
  115. int str_size;
  116. char *ctr_str = NULL;
  117. struct log_c *lc = NULL;
  118. uint64_t rdata;
  119. size_t rdata_size = sizeof(rdata);
  120. if (argc < 3) {
  121. DMWARN("Too few arguments to userspace dirty log");
  122. return -EINVAL;
  123. }
  124. lc = kmalloc(sizeof(*lc), GFP_KERNEL);
  125. if (!lc) {
  126. DMWARN("Unable to allocate userspace log context.");
  127. return -ENOMEM;
  128. }
  129. lc->ti = ti;
  130. if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
  131. DMWARN("UUID argument too long.");
  132. kfree(lc);
  133. return -EINVAL;
  134. }
  135. strncpy(lc->uuid, argv[0], DM_UUID_LEN);
  136. spin_lock_init(&lc->flush_lock);
  137. INIT_LIST_HEAD(&lc->flush_list);
  138. str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str);
  139. if (str_size < 0) {
  140. kfree(lc);
  141. return str_size;
  142. }
  143. /* Send table string */
  144. r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR,
  145. ctr_str, str_size, NULL, NULL);
  146. if (r == -ESRCH) {
  147. DMERR("Userspace log server not found");
  148. goto out;
  149. }
  150. /* Since the region size does not change, get it now */
  151. rdata_size = sizeof(rdata);
  152. r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE,
  153. NULL, 0, (char *)&rdata, &rdata_size);
  154. if (r) {
  155. DMERR("Failed to get region size of dirty log");
  156. goto out;
  157. }
  158. lc->region_size = (uint32_t)rdata;
  159. lc->region_count = dm_sector_div_up(ti->len, lc->region_size);
  160. out:
  161. if (r) {
  162. kfree(lc);
  163. kfree(ctr_str);
  164. } else {
  165. lc->usr_argv_str = ctr_str;
  166. lc->usr_argc = argc;
  167. log->context = lc;
  168. }
  169. return r;
  170. }
  171. static void userspace_dtr(struct dm_dirty_log *log)
  172. {
  173. int r;
  174. struct log_c *lc = log->context;
  175. r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR,
  176. NULL, 0,
  177. NULL, NULL);
  178. kfree(lc->usr_argv_str);
  179. kfree(lc);
  180. return;
  181. }
  182. static int userspace_presuspend(struct dm_dirty_log *log)
  183. {
  184. int r;
  185. struct log_c *lc = log->context;
  186. r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND,
  187. NULL, 0,
  188. NULL, NULL);
  189. return r;
  190. }
  191. static int userspace_postsuspend(struct dm_dirty_log *log)
  192. {
  193. int r;
  194. struct log_c *lc = log->context;
  195. r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND,
  196. NULL, 0,
  197. NULL, NULL);
  198. return r;
  199. }
  200. static int userspace_resume(struct dm_dirty_log *log)
  201. {
  202. int r;
  203. struct log_c *lc = log->context;
  204. lc->in_sync_hint = 0;
  205. r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME,
  206. NULL, 0,
  207. NULL, NULL);
  208. return r;
  209. }
  210. static uint32_t userspace_get_region_size(struct dm_dirty_log *log)
  211. {
  212. struct log_c *lc = log->context;
  213. return lc->region_size;
  214. }
  215. /*
  216. * userspace_is_clean
  217. *
  218. * Check whether a region is clean. If there is any sort of
  219. * failure when consulting the server, we return not clean.
  220. *
  221. * Returns: 1 if clean, 0 otherwise
  222. */
  223. static int userspace_is_clean(struct dm_dirty_log *log, region_t region)
  224. {
  225. int r;
  226. uint64_t region64 = (uint64_t)region;
  227. int64_t is_clean;
  228. size_t rdata_size;
  229. struct log_c *lc = log->context;
  230. rdata_size = sizeof(is_clean);
  231. r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN,
  232. (char *)&region64, sizeof(region64),
  233. (char *)&is_clean, &rdata_size);
  234. return (r) ? 0 : (int)is_clean;
  235. }
  236. /*
  237. * userspace_in_sync
  238. *
  239. * Check if the region is in-sync. If there is any sort
  240. * of failure when consulting the server, we assume that
  241. * the region is not in sync.
  242. *
  243. * If 'can_block' is set, return immediately
  244. *
  245. * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK
  246. */
  247. static int userspace_in_sync(struct dm_dirty_log *log, region_t region,
  248. int can_block)
  249. {
  250. int r;
  251. uint64_t region64 = region;
  252. int64_t in_sync;
  253. size_t rdata_size;
  254. struct log_c *lc = log->context;
  255. /*
  256. * We can never respond directly - even if in_sync_hint is
  257. * set. This is because another machine could see a device
  258. * failure and mark the region out-of-sync. If we don't go
  259. * to userspace to ask, we might think the region is in-sync
  260. * and allow a read to pick up data that is stale. (This is
  261. * very unlikely if a device actually fails; but it is very
  262. * likely if a connection to one device from one machine fails.)
  263. *
  264. * There still might be a problem if the mirror caches the region
  265. * state as in-sync... but then this call would not be made. So,
  266. * that is a mirror problem.
  267. */
  268. if (!can_block)
  269. return -EWOULDBLOCK;
  270. rdata_size = sizeof(in_sync);
  271. r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC,
  272. (char *)&region64, sizeof(region64),
  273. (char *)&in_sync, &rdata_size);
  274. return (r) ? 0 : (int)in_sync;
  275. }
  276. /*
  277. * userspace_flush
  278. *
  279. * This function is ok to block.
  280. * The flush happens in two stages. First, it sends all
  281. * clear/mark requests that are on the list. Then it
  282. * tells the server to commit them. This gives the
  283. * server a chance to optimise the commit, instead of
  284. * doing it for every request.
  285. *
  286. * Additionally, we could implement another thread that
  287. * sends the requests up to the server - reducing the
  288. * load on flush. Then the flush would have less in
  289. * the list and be responsible for the finishing commit.
  290. *
  291. * Returns: 0 on success, < 0 on failure
  292. */
  293. static int userspace_flush(struct dm_dirty_log *log)
  294. {
  295. int r = 0;
  296. unsigned long flags;
  297. struct log_c *lc = log->context;
  298. LIST_HEAD(flush_list);
  299. struct flush_entry *fe, *tmp_fe;
  300. spin_lock_irqsave(&lc->flush_lock, flags);
  301. list_splice_init(&lc->flush_list, &flush_list);
  302. spin_unlock_irqrestore(&lc->flush_lock, flags);
  303. if (list_empty(&flush_list))
  304. return 0;
  305. /*
  306. * FIXME: Count up requests, group request types,
  307. * allocate memory to stick all requests in and
  308. * send to server in one go. Failing the allocation,
  309. * do it one by one.
  310. */
  311. list_for_each_entry(fe, &flush_list, list) {
  312. r = userspace_do_request(lc, lc->uuid, fe->type,
  313. (char *)&fe->region,
  314. sizeof(fe->region),
  315. NULL, NULL);
  316. if (r)
  317. goto fail;
  318. }
  319. r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
  320. NULL, 0, NULL, NULL);
  321. fail:
  322. /*
  323. * We can safely remove these entries, even if failure.
  324. * Calling code will receive an error and will know that
  325. * the log facility has failed.
  326. */
  327. list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) {
  328. list_del(&fe->list);
  329. mempool_free(fe, flush_entry_pool);
  330. }
  331. if (r)
  332. dm_table_event(lc->ti->table);
  333. return r;
  334. }
  335. /*
  336. * userspace_mark_region
  337. *
  338. * This function should avoid blocking unless absolutely required.
  339. * (Memory allocation is valid for blocking.)
  340. */
  341. static void userspace_mark_region(struct dm_dirty_log *log, region_t region)
  342. {
  343. unsigned long flags;
  344. struct log_c *lc = log->context;
  345. struct flush_entry *fe;
  346. /* Wait for an allocation, but _never_ fail */
  347. fe = mempool_alloc(flush_entry_pool, GFP_NOIO);
  348. BUG_ON(!fe);
  349. spin_lock_irqsave(&lc->flush_lock, flags);
  350. fe->type = DM_ULOG_MARK_REGION;
  351. fe->region = region;
  352. list_add(&fe->list, &lc->flush_list);
  353. spin_unlock_irqrestore(&lc->flush_lock, flags);
  354. return;
  355. }
  356. /*
  357. * userspace_clear_region
  358. *
  359. * This function must not block.
  360. * So, the alloc can't block. In the worst case, it is ok to
  361. * fail. It would simply mean we can't clear the region.
  362. * Does nothing to current sync context, but does mean
  363. * the region will be re-sync'ed on a reload of the mirror
  364. * even though it is in-sync.
  365. */
  366. static void userspace_clear_region(struct dm_dirty_log *log, region_t region)
  367. {
  368. unsigned long flags;
  369. struct log_c *lc = log->context;
  370. struct flush_entry *fe;
  371. /*
  372. * If we fail to allocate, we skip the clearing of
  373. * the region. This doesn't hurt us in any way, except
  374. * to cause the region to be resync'ed when the
  375. * device is activated next time.
  376. */
  377. fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC);
  378. if (!fe) {
  379. DMERR("Failed to allocate memory to clear region.");
  380. return;
  381. }
  382. spin_lock_irqsave(&lc->flush_lock, flags);
  383. fe->type = DM_ULOG_CLEAR_REGION;
  384. fe->region = region;
  385. list_add(&fe->list, &lc->flush_list);
  386. spin_unlock_irqrestore(&lc->flush_lock, flags);
  387. return;
  388. }
  389. /*
  390. * userspace_get_resync_work
  391. *
  392. * Get a region that needs recovery. It is valid to return
  393. * an error for this function.
  394. *
  395. * Returns: 1 if region filled, 0 if no work, <0 on error
  396. */
  397. static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region)
  398. {
  399. int r;
  400. size_t rdata_size;
  401. struct log_c *lc = log->context;
  402. struct {
  403. int64_t i; /* 64-bit for mix arch compatibility */
  404. region_t r;
  405. } pkg;
  406. if (lc->in_sync_hint >= lc->region_count)
  407. return 0;
  408. rdata_size = sizeof(pkg);
  409. r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK,
  410. NULL, 0,
  411. (char *)&pkg, &rdata_size);
  412. *region = pkg.r;
  413. return (r) ? r : (int)pkg.i;
  414. }
  415. /*
  416. * userspace_set_region_sync
  417. *
  418. * Set the sync status of a given region. This function
  419. * must not fail.
  420. */
  421. static void userspace_set_region_sync(struct dm_dirty_log *log,
  422. region_t region, int in_sync)
  423. {
  424. int r;
  425. struct log_c *lc = log->context;
  426. struct {
  427. region_t r;
  428. int64_t i;
  429. } pkg;
  430. pkg.r = region;
  431. pkg.i = (int64_t)in_sync;
  432. r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC,
  433. (char *)&pkg, sizeof(pkg),
  434. NULL, NULL);
  435. /*
  436. * It would be nice to be able to report failures.
  437. * However, it is easy emough to detect and resolve.
  438. */
  439. return;
  440. }
  441. /*
  442. * userspace_get_sync_count
  443. *
  444. * If there is any sort of failure when consulting the server,
  445. * we assume that the sync count is zero.
  446. *
  447. * Returns: sync count on success, 0 on failure
  448. */
  449. static region_t userspace_get_sync_count(struct dm_dirty_log *log)
  450. {
  451. int r;
  452. size_t rdata_size;
  453. uint64_t sync_count;
  454. struct log_c *lc = log->context;
  455. rdata_size = sizeof(sync_count);
  456. r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT,
  457. NULL, 0,
  458. (char *)&sync_count, &rdata_size);
  459. if (r)
  460. return 0;
  461. if (sync_count >= lc->region_count)
  462. lc->in_sync_hint = lc->region_count;
  463. return (region_t)sync_count;
  464. }
  465. /*
  466. * userspace_status
  467. *
  468. * Returns: amount of space consumed
  469. */
  470. static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
  471. char *result, unsigned maxlen)
  472. {
  473. int r = 0;
  474. char *table_args;
  475. size_t sz = (size_t)maxlen;
  476. struct log_c *lc = log->context;
  477. switch (status_type) {
  478. case STATUSTYPE_INFO:
  479. r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO,
  480. NULL, 0,
  481. result, &sz);
  482. if (r) {
  483. sz = 0;
  484. DMEMIT("%s 1 COM_FAILURE", log->type->name);
  485. }
  486. break;
  487. case STATUSTYPE_TABLE:
  488. sz = 0;
  489. table_args = strstr(lc->usr_argv_str, " ");
  490. BUG_ON(!table_args); /* There will always be a ' ' */
  491. table_args++;
  492. DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc,
  493. lc->uuid, table_args);
  494. break;
  495. }
  496. return (r) ? 0 : (int)sz;
  497. }
  498. /*
  499. * userspace_is_remote_recovering
  500. *
  501. * Returns: 1 if region recovering, 0 otherwise
  502. */
  503. static int userspace_is_remote_recovering(struct dm_dirty_log *log,
  504. region_t region)
  505. {
  506. int r;
  507. uint64_t region64 = region;
  508. struct log_c *lc = log->context;
  509. static unsigned long long limit;
  510. struct {
  511. int64_t is_recovering;
  512. uint64_t in_sync_hint;
  513. } pkg;
  514. size_t rdata_size = sizeof(pkg);
  515. /*
  516. * Once the mirror has been reported to be in-sync,
  517. * it will never again ask for recovery work. So,
  518. * we can safely say there is not a remote machine
  519. * recovering if the device is in-sync. (in_sync_hint
  520. * must be reset at resume time.)
  521. */
  522. if (region < lc->in_sync_hint)
  523. return 0;
  524. else if (jiffies < limit)
  525. return 1;
  526. limit = jiffies + (HZ / 4);
  527. r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING,
  528. (char *)&region64, sizeof(region64),
  529. (char *)&pkg, &rdata_size);
  530. if (r)
  531. return 1;
  532. lc->in_sync_hint = pkg.in_sync_hint;
  533. return (int)pkg.is_recovering;
  534. }
  535. static struct dm_dirty_log_type _userspace_type = {
  536. .name = "userspace",
  537. .module = THIS_MODULE,
  538. .ctr = userspace_ctr,
  539. .dtr = userspace_dtr,
  540. .presuspend = userspace_presuspend,
  541. .postsuspend = userspace_postsuspend,
  542. .resume = userspace_resume,
  543. .get_region_size = userspace_get_region_size,
  544. .is_clean = userspace_is_clean,
  545. .in_sync = userspace_in_sync,
  546. .flush = userspace_flush,
  547. .mark_region = userspace_mark_region,
  548. .clear_region = userspace_clear_region,
  549. .get_resync_work = userspace_get_resync_work,
  550. .set_region_sync = userspace_set_region_sync,
  551. .get_sync_count = userspace_get_sync_count,
  552. .status = userspace_status,
  553. .is_remote_recovering = userspace_is_remote_recovering,
  554. };
  555. static int __init userspace_dirty_log_init(void)
  556. {
  557. int r = 0;
  558. flush_entry_pool = mempool_create(100, flush_entry_alloc,
  559. flush_entry_free, NULL);
  560. if (!flush_entry_pool) {
  561. DMWARN("Unable to create flush_entry_pool: No memory.");
  562. return -ENOMEM;
  563. }
  564. r = dm_ulog_tfr_init();
  565. if (r) {
  566. DMWARN("Unable to initialize userspace log communications");
  567. mempool_destroy(flush_entry_pool);
  568. return r;
  569. }
  570. r = dm_dirty_log_type_register(&_userspace_type);
  571. if (r) {
  572. DMWARN("Couldn't register userspace dirty log type");
  573. dm_ulog_tfr_exit();
  574. mempool_destroy(flush_entry_pool);
  575. return r;
  576. }
  577. DMINFO("version 1.0.0 loaded");
  578. return 0;
  579. }
  580. static void __exit userspace_dirty_log_exit(void)
  581. {
  582. dm_dirty_log_type_unregister(&_userspace_type);
  583. dm_ulog_tfr_exit();
  584. mempool_destroy(flush_entry_pool);
  585. DMINFO("version 1.0.0 unloaded");
  586. return;
  587. }
  588. module_init(userspace_dirty_log_init);
  589. module_exit(userspace_dirty_log_exit);
  590. MODULE_DESCRIPTION(DM_NAME " userspace dirty log link");
  591. MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>");
  592. MODULE_LICENSE("GPL");