cleancache.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. /*
  2. * Cleancache frontend
  3. *
  4. * This code provides the generic "frontend" layer to call a matching
  5. * "backend" driver implementation of cleancache. See
  6. * Documentation/vm/cleancache.txt for more information.
  7. *
  8. * Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
  9. * Author: Dan Magenheimer
  10. *
  11. * This work is licensed under the terms of the GNU GPL, version 2.
  12. */
  13. #include <linux/module.h>
  14. #include <linux/fs.h>
  15. #include <linux/exportfs.h>
  16. #include <linux/mm.h>
  17. #include <linux/debugfs.h>
  18. #include <linux/cleancache.h>
  19. /*
  20. * cleancache_ops is set by cleancache_ops_register to contain the pointers
  21. * to the cleancache "backend" implementation functions.
  22. */
  23. static struct cleancache_ops *cleancache_ops __read_mostly;
  24. /*
  25. * Counters available via /sys/kernel/debug/frontswap (if debugfs is
  26. * properly configured. These are for information only so are not protected
  27. * against increment races.
  28. */
  29. static u64 cleancache_succ_gets;
  30. static u64 cleancache_failed_gets;
  31. static u64 cleancache_puts;
  32. static u64 cleancache_invalidates;
  33. /*
  34. * When no backend is registered all calls to init_fs and init_shared_fs
  35. * are registered and fake poolids (FAKE_FS_POOLID_OFFSET or
  36. * FAKE_SHARED_FS_POOLID_OFFSET, plus offset in the respective array
  37. * [shared_|]fs_poolid_map) are given to the respective super block
  38. * (sb->cleancache_poolid) and no tmem_pools are created. When a backend
  39. * registers with cleancache the previous calls to init_fs and init_shared_fs
  40. * are executed to create tmem_pools and set the respective poolids. While no
  41. * backend is registered all "puts", "gets" and "flushes" are ignored or failed.
  42. */
  43. #define MAX_INITIALIZABLE_FS 32
  44. #define FAKE_FS_POOLID_OFFSET 1000
  45. #define FAKE_SHARED_FS_POOLID_OFFSET 2000
  46. #define FS_NO_BACKEND (-1)
  47. #define FS_UNKNOWN (-2)
  48. static int fs_poolid_map[MAX_INITIALIZABLE_FS];
  49. static int shared_fs_poolid_map[MAX_INITIALIZABLE_FS];
  50. static char *uuids[MAX_INITIALIZABLE_FS];
  51. /*
  52. * Mutex for the [shared_|]fs_poolid_map to guard against multiple threads
  53. * invoking umount (and ending in __cleancache_invalidate_fs) and also multiple
  54. * threads calling mount (and ending up in __cleancache_init_[shared|]fs).
  55. */
  56. static DEFINE_MUTEX(poolid_mutex);
  57. /*
  58. * When set to false (default) all calls to the cleancache functions, except
  59. * the __cleancache_invalidate_fs and __cleancache_init_[shared|]fs are guarded
  60. * by the if (!cleancache_ops) return. This means multiple threads (from
  61. * different filesystems) will be checking cleancache_ops. The usage of a
  62. * bool instead of a atomic_t or a bool guarded by a spinlock is OK - we are
  63. * OK if the time between the backend's have been initialized (and
  64. * cleancache_ops has been set to not NULL) and when the filesystems start
  65. * actually calling the backends. The inverse (when unloading) is obviously
  66. * not good - but this shim does not do that (yet).
  67. */
  68. /*
  69. * The backends and filesystems work all asynchronously. This is b/c the
  70. * backends can be built as modules.
  71. * The usual sequence of events is:
  72. * a) mount / -> __cleancache_init_fs is called. We set the
  73. * [shared_|]fs_poolid_map and uuids for.
  74. *
  75. * b). user does I/Os -> we call the rest of __cleancache_* functions
  76. * which return immediately as cleancache_ops is false.
  77. *
  78. * c). modprobe zcache -> cleancache_register_ops. We init the backend
  79. * and set cleancache_ops to true, and for any fs_poolid_map
  80. * (which is set by __cleancache_init_fs) we initialize the poolid.
  81. *
  82. * d). user does I/Os -> now that cleancache_ops is true all the
  83. * __cleancache_* functions can call the backend. They all check
  84. * that fs_poolid_map is valid and if so invoke the backend.
  85. *
  86. * e). umount / -> __cleancache_invalidate_fs, the fs_poolid_map is
  87. * reset (which is the second check in the __cleancache_* ops
  88. * to call the backend).
  89. *
  90. * The sequence of event could also be c), followed by a), and d). and e). The
  91. * c) would not happen anymore. There is also the chance of c), and one thread
  92. * doing a) + d), and another doing e). For that case we depend on the
  93. * filesystem calling __cleancache_invalidate_fs in the proper sequence (so
  94. * that it handles all I/Os before it invalidates the fs (which is last part
  95. * of unmounting process).
  96. *
  97. * Note: The acute reader will notice that there is no "rmmod zcache" case.
  98. * This is b/c the functionality for that is not yet implemented and when
  99. * done, will require some extra locking not yet devised.
  100. */
  101. /*
  102. * Register operations for cleancache, returning previous thus allowing
  103. * detection of multiple backends and possible nesting.
  104. */
  105. struct cleancache_ops *cleancache_register_ops(struct cleancache_ops *ops)
  106. {
  107. struct cleancache_ops *old = cleancache_ops;
  108. int i;
  109. mutex_lock(&poolid_mutex);
  110. for (i = 0; i < MAX_INITIALIZABLE_FS; i++) {
  111. if (fs_poolid_map[i] == FS_NO_BACKEND)
  112. fs_poolid_map[i] = ops->init_fs(PAGE_SIZE);
  113. if (shared_fs_poolid_map[i] == FS_NO_BACKEND)
  114. shared_fs_poolid_map[i] = ops->init_shared_fs
  115. (uuids[i], PAGE_SIZE);
  116. }
  117. /*
  118. * We MUST set cleancache_ops _after_ we have called the backends
  119. * init_fs or init_shared_fs functions. Otherwise the compiler might
  120. * re-order where cleancache_ops is set in this function.
  121. */
  122. barrier();
  123. cleancache_ops = ops;
  124. mutex_unlock(&poolid_mutex);
  125. return old;
  126. }
  127. EXPORT_SYMBOL(cleancache_register_ops);
  128. /* Called by a cleancache-enabled filesystem at time of mount */
  129. void __cleancache_init_fs(struct super_block *sb)
  130. {
  131. int i;
  132. mutex_lock(&poolid_mutex);
  133. for (i = 0; i < MAX_INITIALIZABLE_FS; i++) {
  134. if (fs_poolid_map[i] == FS_UNKNOWN) {
  135. sb->cleancache_poolid = i + FAKE_FS_POOLID_OFFSET;
  136. if (cleancache_ops)
  137. fs_poolid_map[i] = cleancache_ops->init_fs(PAGE_SIZE);
  138. else
  139. fs_poolid_map[i] = FS_NO_BACKEND;
  140. break;
  141. }
  142. }
  143. mutex_unlock(&poolid_mutex);
  144. }
  145. EXPORT_SYMBOL(__cleancache_init_fs);
  146. /* Called by a cleancache-enabled clustered filesystem at time of mount */
  147. void __cleancache_init_shared_fs(char *uuid, struct super_block *sb)
  148. {
  149. int i;
  150. mutex_lock(&poolid_mutex);
  151. for (i = 0; i < MAX_INITIALIZABLE_FS; i++) {
  152. if (shared_fs_poolid_map[i] == FS_UNKNOWN) {
  153. sb->cleancache_poolid = i + FAKE_SHARED_FS_POOLID_OFFSET;
  154. uuids[i] = uuid;
  155. if (cleancache_ops)
  156. shared_fs_poolid_map[i] = cleancache_ops->init_shared_fs
  157. (uuid, PAGE_SIZE);
  158. else
  159. shared_fs_poolid_map[i] = FS_NO_BACKEND;
  160. break;
  161. }
  162. }
  163. mutex_unlock(&poolid_mutex);
  164. }
  165. EXPORT_SYMBOL(__cleancache_init_shared_fs);
  166. /*
  167. * If the filesystem uses exportable filehandles, use the filehandle as
  168. * the key, else use the inode number.
  169. */
  170. static int cleancache_get_key(struct inode *inode,
  171. struct cleancache_filekey *key)
  172. {
  173. int (*fhfn)(struct inode *, __u32 *fh, int *, struct inode *);
  174. int len = 0, maxlen = CLEANCACHE_KEY_MAX;
  175. struct super_block *sb = inode->i_sb;
  176. key->u.ino = inode->i_ino;
  177. if (sb->s_export_op != NULL) {
  178. fhfn = sb->s_export_op->encode_fh;
  179. if (fhfn) {
  180. len = (*fhfn)(inode, &key->u.fh[0], &maxlen, NULL);
  181. if (len <= FILEID_ROOT || len == FILEID_INVALID)
  182. return -1;
  183. if (maxlen > CLEANCACHE_KEY_MAX)
  184. return -1;
  185. }
  186. }
  187. return 0;
  188. }
  189. /*
  190. * Returns a pool_id that is associated with a given fake poolid.
  191. */
  192. static int get_poolid_from_fake(int fake_pool_id)
  193. {
  194. if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET)
  195. return shared_fs_poolid_map[fake_pool_id -
  196. FAKE_SHARED_FS_POOLID_OFFSET];
  197. else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET)
  198. return fs_poolid_map[fake_pool_id - FAKE_FS_POOLID_OFFSET];
  199. return FS_NO_BACKEND;
  200. }
  201. /*
  202. * "Get" data from cleancache associated with the poolid/inode/index
  203. * that were specified when the data was put to cleanache and, if
  204. * successful, use it to fill the specified page with data and return 0.
  205. * The pageframe is unchanged and returns -1 if the get fails.
  206. * Page must be locked by caller.
  207. *
  208. * The function has two checks before any action is taken - whether
  209. * a backend is registered and whether the sb->cleancache_poolid
  210. * is correct.
  211. */
  212. int __cleancache_get_page(struct page *page)
  213. {
  214. int ret = -1;
  215. int pool_id;
  216. int fake_pool_id;
  217. struct cleancache_filekey key = { .u.key = { 0 } };
  218. if (!cleancache_ops) {
  219. cleancache_failed_gets++;
  220. goto out;
  221. }
  222. VM_BUG_ON(!PageLocked(page));
  223. fake_pool_id = page->mapping->host->i_sb->cleancache_poolid;
  224. if (fake_pool_id < 0)
  225. goto out;
  226. pool_id = get_poolid_from_fake(fake_pool_id);
  227. if (cleancache_get_key(page->mapping->host, &key) < 0)
  228. goto out;
  229. if (pool_id >= 0)
  230. ret = cleancache_ops->get_page(pool_id,
  231. key, page->index, page);
  232. if (ret == 0)
  233. cleancache_succ_gets++;
  234. else
  235. cleancache_failed_gets++;
  236. out:
  237. return ret;
  238. }
  239. EXPORT_SYMBOL(__cleancache_get_page);
  240. /*
  241. * "Put" data from a page to cleancache and associate it with the
  242. * (previously-obtained per-filesystem) poolid and the page's,
  243. * inode and page index. Page must be locked. Note that a put_page
  244. * always "succeeds", though a subsequent get_page may succeed or fail.
  245. *
  246. * The function has two checks before any action is taken - whether
  247. * a backend is registered and whether the sb->cleancache_poolid
  248. * is correct.
  249. */
  250. void __cleancache_put_page(struct page *page)
  251. {
  252. int pool_id;
  253. int fake_pool_id;
  254. struct cleancache_filekey key = { .u.key = { 0 } };
  255. if (!cleancache_ops) {
  256. cleancache_puts++;
  257. return;
  258. }
  259. VM_BUG_ON(!PageLocked(page));
  260. fake_pool_id = page->mapping->host->i_sb->cleancache_poolid;
  261. if (fake_pool_id < 0)
  262. return;
  263. pool_id = get_poolid_from_fake(fake_pool_id);
  264. if (pool_id >= 0 &&
  265. cleancache_get_key(page->mapping->host, &key) >= 0) {
  266. cleancache_ops->put_page(pool_id, key, page->index, page);
  267. cleancache_puts++;
  268. }
  269. }
  270. EXPORT_SYMBOL(__cleancache_put_page);
  271. /*
  272. * Invalidate any data from cleancache associated with the poolid and the
  273. * page's inode and page index so that a subsequent "get" will fail.
  274. *
  275. * The function has two checks before any action is taken - whether
  276. * a backend is registered and whether the sb->cleancache_poolid
  277. * is correct.
  278. */
  279. void __cleancache_invalidate_page(struct address_space *mapping,
  280. struct page *page)
  281. {
  282. /* careful... page->mapping is NULL sometimes when this is called */
  283. int pool_id;
  284. int fake_pool_id = mapping->host->i_sb->cleancache_poolid;
  285. struct cleancache_filekey key = { .u.key = { 0 } };
  286. if (!cleancache_ops)
  287. return;
  288. if (fake_pool_id >= 0) {
  289. pool_id = get_poolid_from_fake(fake_pool_id);
  290. if (pool_id < 0)
  291. return;
  292. VM_BUG_ON(!PageLocked(page));
  293. if (cleancache_get_key(mapping->host, &key) >= 0) {
  294. cleancache_ops->invalidate_page(pool_id,
  295. key, page->index);
  296. cleancache_invalidates++;
  297. }
  298. }
  299. }
  300. EXPORT_SYMBOL(__cleancache_invalidate_page);
  301. /*
  302. * Invalidate all data from cleancache associated with the poolid and the
  303. * mappings's inode so that all subsequent gets to this poolid/inode
  304. * will fail.
  305. *
  306. * The function has two checks before any action is taken - whether
  307. * a backend is registered and whether the sb->cleancache_poolid
  308. * is correct.
  309. */
  310. void __cleancache_invalidate_inode(struct address_space *mapping)
  311. {
  312. int pool_id;
  313. int fake_pool_id = mapping->host->i_sb->cleancache_poolid;
  314. struct cleancache_filekey key = { .u.key = { 0 } };
  315. if (!cleancache_ops)
  316. return;
  317. if (fake_pool_id < 0)
  318. return;
  319. pool_id = get_poolid_from_fake(fake_pool_id);
  320. if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0)
  321. cleancache_ops->invalidate_inode(pool_id, key);
  322. }
  323. EXPORT_SYMBOL(__cleancache_invalidate_inode);
  324. /*
  325. * Called by any cleancache-enabled filesystem at time of unmount;
  326. * note that pool_id is surrendered and may be returned by a subsequent
  327. * cleancache_init_fs or cleancache_init_shared_fs.
  328. */
  329. void __cleancache_invalidate_fs(struct super_block *sb)
  330. {
  331. int index;
  332. int fake_pool_id = sb->cleancache_poolid;
  333. int old_poolid = fake_pool_id;
  334. mutex_lock(&poolid_mutex);
  335. if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET) {
  336. index = fake_pool_id - FAKE_SHARED_FS_POOLID_OFFSET;
  337. old_poolid = shared_fs_poolid_map[index];
  338. shared_fs_poolid_map[index] = FS_UNKNOWN;
  339. uuids[index] = NULL;
  340. } else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET) {
  341. index = fake_pool_id - FAKE_FS_POOLID_OFFSET;
  342. old_poolid = fs_poolid_map[index];
  343. fs_poolid_map[index] = FS_UNKNOWN;
  344. }
  345. sb->cleancache_poolid = -1;
  346. if (cleancache_ops)
  347. cleancache_ops->invalidate_fs(old_poolid);
  348. mutex_unlock(&poolid_mutex);
  349. }
  350. EXPORT_SYMBOL(__cleancache_invalidate_fs);
  351. static int __init init_cleancache(void)
  352. {
  353. int i;
  354. #ifdef CONFIG_DEBUG_FS
  355. struct dentry *root = debugfs_create_dir("cleancache", NULL);
  356. if (root == NULL)
  357. return -ENXIO;
  358. debugfs_create_u64("succ_gets", S_IRUGO, root, &cleancache_succ_gets);
  359. debugfs_create_u64("failed_gets", S_IRUGO,
  360. root, &cleancache_failed_gets);
  361. debugfs_create_u64("puts", S_IRUGO, root, &cleancache_puts);
  362. debugfs_create_u64("invalidates", S_IRUGO,
  363. root, &cleancache_invalidates);
  364. #endif
  365. for (i = 0; i < MAX_INITIALIZABLE_FS; i++) {
  366. fs_poolid_map[i] = FS_UNKNOWN;
  367. shared_fs_poolid_map[i] = FS_UNKNOWN;
  368. }
  369. return 0;
  370. }
  371. module_init(init_cleancache)