cleancache.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. /*
  2. * Cleancache frontend
  3. *
  4. * This code provides the generic "frontend" layer to call a matching
  5. * "backend" driver implementation of cleancache. See
  6. * Documentation/vm/cleancache.txt for more information.
  7. *
  8. * Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
  9. * Author: Dan Magenheimer
  10. *
  11. * This work is licensed under the terms of the GNU GPL, version 2.
  12. */
  13. #include <linux/module.h>
  14. #include <linux/fs.h>
  15. #include <linux/exportfs.h>
  16. #include <linux/mm.h>
  17. #include <linux/debugfs.h>
  18. #include <linux/cleancache.h>
  19. /*
  20. * This global enablement flag may be read thousands of times per second
  21. * by cleancache_get/put/invalidate even on systems where cleancache_ops
  22. * is not claimed (e.g. cleancache is config'ed on but remains
  23. * disabled), so is preferred to the slower alternative: a function
  24. * call that checks a non-global.
  25. */
  26. int cleancache_enabled __read_mostly;
  27. EXPORT_SYMBOL(cleancache_enabled);
  28. /*
  29. * cleancache_ops is set by cleancache_ops_register to contain the pointers
  30. * to the cleancache "backend" implementation functions.
  31. */
  32. static struct cleancache_ops *cleancache_ops __read_mostly;
  33. /*
  34. * Counters available via /sys/kernel/debug/frontswap (if debugfs is
  35. * properly configured. These are for information only so are not protected
  36. * against increment races.
  37. */
  38. static u64 cleancache_succ_gets;
  39. static u64 cleancache_failed_gets;
  40. static u64 cleancache_puts;
  41. static u64 cleancache_invalidates;
  42. /*
  43. * When no backend is registered all calls to init_fs and init_shared_fs
  44. * are registered and fake poolids (FAKE_FS_POOLID_OFFSET or
  45. * FAKE_SHARED_FS_POOLID_OFFSET, plus offset in the respective array
  46. * [shared_|]fs_poolid_map) are given to the respective super block
  47. * (sb->cleancache_poolid) and no tmem_pools are created. When a backend
  48. * registers with cleancache the previous calls to init_fs and init_shared_fs
  49. * are executed to create tmem_pools and set the respective poolids. While no
  50. * backend is registered all "puts", "gets" and "flushes" are ignored or failed.
  51. */
  52. #define MAX_INITIALIZABLE_FS 32
  53. #define FAKE_FS_POOLID_OFFSET 1000
  54. #define FAKE_SHARED_FS_POOLID_OFFSET 2000
  55. #define FS_NO_BACKEND (-1)
  56. #define FS_UNKNOWN (-2)
  57. static int fs_poolid_map[MAX_INITIALIZABLE_FS];
  58. static int shared_fs_poolid_map[MAX_INITIALIZABLE_FS];
  59. static char *uuids[MAX_INITIALIZABLE_FS];
  60. /*
  61. * Mutex for the [shared_|]fs_poolid_map to guard against multiple threads
  62. * invoking umount (and ending in __cleancache_invalidate_fs) and also multiple
  63. * threads calling mount (and ending up in __cleancache_init_[shared|]fs).
  64. */
  65. static DEFINE_MUTEX(poolid_mutex);
  66. /*
  67. * When set to false (default) all calls to the cleancache functions, except
  68. * the __cleancache_invalidate_fs and __cleancache_init_[shared|]fs are guarded
  69. * by the if (!cleancache_ops) return. This means multiple threads (from
  70. * different filesystems) will be checking cleancache_ops. The usage of a
  71. * bool instead of a atomic_t or a bool guarded by a spinlock is OK - we are
  72. * OK if the time between the backend's have been initialized (and
  73. * cleancache_ops has been set to not NULL) and when the filesystems start
  74. * actually calling the backends. The inverse (when unloading) is obviously
  75. * not good - but this shim does not do that (yet).
  76. */
  77. /*
  78. * The backends and filesystems work all asynchronously. This is b/c the
  79. * backends can be built as modules.
  80. * The usual sequence of events is:
  81. * a) mount / -> __cleancache_init_fs is called. We set the
  82. * [shared_|]fs_poolid_map and uuids for.
  83. *
  84. * b). user does I/Os -> we call the rest of __cleancache_* functions
  85. * which return immediately as cleancache_ops is false.
  86. *
  87. * c). modprobe zcache -> cleancache_register_ops. We init the backend
  88. * and set cleancache_ops to true, and for any fs_poolid_map
  89. * (which is set by __cleancache_init_fs) we initialize the poolid.
  90. *
  91. * d). user does I/Os -> now that cleancache_ops is true all the
  92. * __cleancache_* functions can call the backend. They all check
  93. * that fs_poolid_map is valid and if so invoke the backend.
  94. *
  95. * e). umount / -> __cleancache_invalidate_fs, the fs_poolid_map is
  96. * reset (which is the second check in the __cleancache_* ops
  97. * to call the backend).
  98. *
  99. * The sequence of event could also be c), followed by a), and d). and e). The
  100. * c) would not happen anymore. There is also the chance of c), and one thread
  101. * doing a) + d), and another doing e). For that case we depend on the
  102. * filesystem calling __cleancache_invalidate_fs in the proper sequence (so
  103. * that it handles all I/Os before it invalidates the fs (which is last part
  104. * of unmounting process).
  105. *
  106. * Note: The acute reader will notice that there is no "rmmod zcache" case.
  107. * This is b/c the functionality for that is not yet implemented and when
  108. * done, will require some extra locking not yet devised.
  109. */
  110. /*
  111. * Register operations for cleancache, returning previous thus allowing
  112. * detection of multiple backends and possible nesting.
  113. */
  114. struct cleancache_ops *cleancache_register_ops(struct cleancache_ops *ops)
  115. {
  116. struct cleancache_ops *old = cleancache_ops;
  117. int i;
  118. mutex_lock(&poolid_mutex);
  119. for (i = 0; i < MAX_INITIALIZABLE_FS; i++) {
  120. if (fs_poolid_map[i] == FS_NO_BACKEND)
  121. fs_poolid_map[i] = ops->init_fs(PAGE_SIZE);
  122. if (shared_fs_poolid_map[i] == FS_NO_BACKEND)
  123. shared_fs_poolid_map[i] = ops->init_shared_fs
  124. (uuids[i], PAGE_SIZE);
  125. }
  126. /*
  127. * We MUST set cleancache_ops _after_ we have called the backends
  128. * init_fs or init_shared_fs functions. Otherwise the compiler might
  129. * re-order where cleancache_ops is set in this function.
  130. */
  131. barrier();
  132. cleancache_ops = ops;
  133. mutex_unlock(&poolid_mutex);
  134. return old;
  135. }
  136. EXPORT_SYMBOL(cleancache_register_ops);
  137. /* Called by a cleancache-enabled filesystem at time of mount */
  138. void __cleancache_init_fs(struct super_block *sb)
  139. {
  140. int i;
  141. mutex_lock(&poolid_mutex);
  142. for (i = 0; i < MAX_INITIALIZABLE_FS; i++) {
  143. if (fs_poolid_map[i] == FS_UNKNOWN) {
  144. sb->cleancache_poolid = i + FAKE_FS_POOLID_OFFSET;
  145. if (cleancache_ops)
  146. fs_poolid_map[i] = cleancache_ops->init_fs(PAGE_SIZE);
  147. else
  148. fs_poolid_map[i] = FS_NO_BACKEND;
  149. break;
  150. }
  151. }
  152. mutex_unlock(&poolid_mutex);
  153. }
  154. EXPORT_SYMBOL(__cleancache_init_fs);
  155. /* Called by a cleancache-enabled clustered filesystem at time of mount */
  156. void __cleancache_init_shared_fs(char *uuid, struct super_block *sb)
  157. {
  158. int i;
  159. mutex_lock(&poolid_mutex);
  160. for (i = 0; i < MAX_INITIALIZABLE_FS; i++) {
  161. if (shared_fs_poolid_map[i] == FS_UNKNOWN) {
  162. sb->cleancache_poolid = i + FAKE_SHARED_FS_POOLID_OFFSET;
  163. uuids[i] = uuid;
  164. if (cleancache_ops)
  165. shared_fs_poolid_map[i] = cleancache_ops->init_shared_fs
  166. (uuid, PAGE_SIZE);
  167. else
  168. shared_fs_poolid_map[i] = FS_NO_BACKEND;
  169. break;
  170. }
  171. }
  172. mutex_unlock(&poolid_mutex);
  173. }
  174. EXPORT_SYMBOL(__cleancache_init_shared_fs);
  175. /*
  176. * If the filesystem uses exportable filehandles, use the filehandle as
  177. * the key, else use the inode number.
  178. */
  179. static int cleancache_get_key(struct inode *inode,
  180. struct cleancache_filekey *key)
  181. {
  182. int (*fhfn)(struct inode *, __u32 *fh, int *, struct inode *);
  183. int len = 0, maxlen = CLEANCACHE_KEY_MAX;
  184. struct super_block *sb = inode->i_sb;
  185. key->u.ino = inode->i_ino;
  186. if (sb->s_export_op != NULL) {
  187. fhfn = sb->s_export_op->encode_fh;
  188. if (fhfn) {
  189. len = (*fhfn)(inode, &key->u.fh[0], &maxlen, NULL);
  190. if (len <= FILEID_ROOT || len == FILEID_INVALID)
  191. return -1;
  192. if (maxlen > CLEANCACHE_KEY_MAX)
  193. return -1;
  194. }
  195. }
  196. return 0;
  197. }
  198. /*
  199. * Returns a pool_id that is associated with a given fake poolid.
  200. */
  201. static int get_poolid_from_fake(int fake_pool_id)
  202. {
  203. if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET)
  204. return shared_fs_poolid_map[fake_pool_id -
  205. FAKE_SHARED_FS_POOLID_OFFSET];
  206. else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET)
  207. return fs_poolid_map[fake_pool_id - FAKE_FS_POOLID_OFFSET];
  208. return FS_NO_BACKEND;
  209. }
  210. /*
  211. * "Get" data from cleancache associated with the poolid/inode/index
  212. * that were specified when the data was put to cleanache and, if
  213. * successful, use it to fill the specified page with data and return 0.
  214. * The pageframe is unchanged and returns -1 if the get fails.
  215. * Page must be locked by caller.
  216. *
  217. * The function has two checks before any action is taken - whether
  218. * a backend is registered and whether the sb->cleancache_poolid
  219. * is correct.
  220. */
  221. int __cleancache_get_page(struct page *page)
  222. {
  223. int ret = -1;
  224. int pool_id;
  225. int fake_pool_id;
  226. struct cleancache_filekey key = { .u.key = { 0 } };
  227. if (!cleancache_ops) {
  228. cleancache_failed_gets++;
  229. goto out;
  230. }
  231. VM_BUG_ON(!PageLocked(page));
  232. fake_pool_id = page->mapping->host->i_sb->cleancache_poolid;
  233. if (fake_pool_id < 0)
  234. goto out;
  235. pool_id = get_poolid_from_fake(fake_pool_id);
  236. if (cleancache_get_key(page->mapping->host, &key) < 0)
  237. goto out;
  238. if (pool_id >= 0)
  239. ret = cleancache_ops->get_page(pool_id,
  240. key, page->index, page);
  241. if (ret == 0)
  242. cleancache_succ_gets++;
  243. else
  244. cleancache_failed_gets++;
  245. out:
  246. return ret;
  247. }
  248. EXPORT_SYMBOL(__cleancache_get_page);
  249. /*
  250. * "Put" data from a page to cleancache and associate it with the
  251. * (previously-obtained per-filesystem) poolid and the page's,
  252. * inode and page index. Page must be locked. Note that a put_page
  253. * always "succeeds", though a subsequent get_page may succeed or fail.
  254. *
  255. * The function has two checks before any action is taken - whether
  256. * a backend is registered and whether the sb->cleancache_poolid
  257. * is correct.
  258. */
  259. void __cleancache_put_page(struct page *page)
  260. {
  261. int pool_id;
  262. int fake_pool_id;
  263. struct cleancache_filekey key = { .u.key = { 0 } };
  264. if (!cleancache_ops) {
  265. cleancache_puts++;
  266. return;
  267. }
  268. VM_BUG_ON(!PageLocked(page));
  269. fake_pool_id = page->mapping->host->i_sb->cleancache_poolid;
  270. if (fake_pool_id < 0)
  271. return;
  272. pool_id = get_poolid_from_fake(fake_pool_id);
  273. if (pool_id >= 0 &&
  274. cleancache_get_key(page->mapping->host, &key) >= 0) {
  275. cleancache_ops->put_page(pool_id, key, page->index, page);
  276. cleancache_puts++;
  277. }
  278. }
  279. EXPORT_SYMBOL(__cleancache_put_page);
  280. /*
  281. * Invalidate any data from cleancache associated with the poolid and the
  282. * page's inode and page index so that a subsequent "get" will fail.
  283. *
  284. * The function has two checks before any action is taken - whether
  285. * a backend is registered and whether the sb->cleancache_poolid
  286. * is correct.
  287. */
  288. void __cleancache_invalidate_page(struct address_space *mapping,
  289. struct page *page)
  290. {
  291. /* careful... page->mapping is NULL sometimes when this is called */
  292. int pool_id;
  293. int fake_pool_id = mapping->host->i_sb->cleancache_poolid;
  294. struct cleancache_filekey key = { .u.key = { 0 } };
  295. if (!cleancache_ops)
  296. return;
  297. if (fake_pool_id >= 0) {
  298. pool_id = get_poolid_from_fake(fake_pool_id);
  299. if (pool_id < 0)
  300. return;
  301. VM_BUG_ON(!PageLocked(page));
  302. if (cleancache_get_key(mapping->host, &key) >= 0) {
  303. cleancache_ops->invalidate_page(pool_id,
  304. key, page->index);
  305. cleancache_invalidates++;
  306. }
  307. }
  308. }
  309. EXPORT_SYMBOL(__cleancache_invalidate_page);
  310. /*
  311. * Invalidate all data from cleancache associated with the poolid and the
  312. * mappings's inode so that all subsequent gets to this poolid/inode
  313. * will fail.
  314. *
  315. * The function has two checks before any action is taken - whether
  316. * a backend is registered and whether the sb->cleancache_poolid
  317. * is correct.
  318. */
  319. void __cleancache_invalidate_inode(struct address_space *mapping)
  320. {
  321. int pool_id;
  322. int fake_pool_id = mapping->host->i_sb->cleancache_poolid;
  323. struct cleancache_filekey key = { .u.key = { 0 } };
  324. if (!cleancache_ops)
  325. return;
  326. if (fake_pool_id < 0)
  327. return;
  328. pool_id = get_poolid_from_fake(fake_pool_id);
  329. if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0)
  330. cleancache_ops->invalidate_inode(pool_id, key);
  331. }
  332. EXPORT_SYMBOL(__cleancache_invalidate_inode);
  333. /*
  334. * Called by any cleancache-enabled filesystem at time of unmount;
  335. * note that pool_id is surrendered and may be returned by a subsequent
  336. * cleancache_init_fs or cleancache_init_shared_fs.
  337. */
  338. void __cleancache_invalidate_fs(struct super_block *sb)
  339. {
  340. int index;
  341. int fake_pool_id = sb->cleancache_poolid;
  342. int old_poolid = fake_pool_id;
  343. mutex_lock(&poolid_mutex);
  344. if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET) {
  345. index = fake_pool_id - FAKE_SHARED_FS_POOLID_OFFSET;
  346. old_poolid = shared_fs_poolid_map[index];
  347. shared_fs_poolid_map[index] = FS_UNKNOWN;
  348. uuids[index] = NULL;
  349. } else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET) {
  350. index = fake_pool_id - FAKE_FS_POOLID_OFFSET;
  351. old_poolid = fs_poolid_map[index];
  352. fs_poolid_map[index] = FS_UNKNOWN;
  353. }
  354. sb->cleancache_poolid = -1;
  355. if (cleancache_ops)
  356. cleancache_ops->invalidate_fs(old_poolid);
  357. mutex_unlock(&poolid_mutex);
  358. }
  359. EXPORT_SYMBOL(__cleancache_invalidate_fs);
  360. static int __init init_cleancache(void)
  361. {
  362. int i;
  363. #ifdef CONFIG_DEBUG_FS
  364. struct dentry *root = debugfs_create_dir("cleancache", NULL);
  365. if (root == NULL)
  366. return -ENXIO;
  367. debugfs_create_u64("succ_gets", S_IRUGO, root, &cleancache_succ_gets);
  368. debugfs_create_u64("failed_gets", S_IRUGO,
  369. root, &cleancache_failed_gets);
  370. debugfs_create_u64("puts", S_IRUGO, root, &cleancache_puts);
  371. debugfs_create_u64("invalidates", S_IRUGO,
  372. root, &cleancache_invalidates);
  373. #endif
  374. for (i = 0; i < MAX_INITIALIZABLE_FS; i++) {
  375. fs_poolid_map[i] = FS_UNKNOWN;
  376. shared_fs_poolid_map[i] = FS_UNKNOWN;
  377. }
  378. cleancache_enabled = 1;
  379. return 0;
  380. }
  381. module_init(init_cleancache)