localalloc.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245
  1. /* -*- mode: c; c-basic-offset: 8; -*-
  2. * vim: noexpandtab sw=8 ts=8 sts=0:
  3. *
  4. * localalloc.c
  5. *
  6. * Node local data allocation
  7. *
  8. * Copyright (C) 2002, 2004 Oracle. All rights reserved.
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2 of the License, or (at your option) any later version.
  14. *
  15. * This program is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU General Public
  21. * License along with this program; if not, write to the
  22. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23. * Boston, MA 021110-1307, USA.
  24. */
  25. #include <linux/fs.h>
  26. #include <linux/types.h>
  27. #include <linux/slab.h>
  28. #include <linux/highmem.h>
  29. #include <linux/bitops.h>
  30. #include <linux/debugfs.h>
  31. #define MLOG_MASK_PREFIX ML_DISK_ALLOC
  32. #include <cluster/masklog.h>
  33. #include "ocfs2.h"
  34. #include "alloc.h"
  35. #include "dlmglue.h"
  36. #include "inode.h"
  37. #include "journal.h"
  38. #include "localalloc.h"
  39. #include "suballoc.h"
  40. #include "super.h"
  41. #include "sysfile.h"
  42. #include "buffer_head_io.h"
  43. #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab))
  44. static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
  45. static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
  46. struct ocfs2_dinode *alloc,
  47. u32 numbits);
  48. static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
  49. static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
  50. handle_t *handle,
  51. struct ocfs2_dinode *alloc,
  52. struct inode *main_bm_inode,
  53. struct buffer_head *main_bm_bh);
  54. static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
  55. struct ocfs2_alloc_context **ac,
  56. struct inode **bitmap_inode,
  57. struct buffer_head **bitmap_bh);
  58. static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
  59. handle_t *handle,
  60. struct ocfs2_alloc_context *ac);
  61. static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
  62. struct inode *local_alloc_inode);
  63. #ifdef CONFIG_OCFS2_FS_STATS
  64. DEFINE_MUTEX(la_debug_mutex);
  65. static int ocfs2_la_debug_open(struct inode *inode, struct file *file)
  66. {
  67. file->private_data = inode->i_private;
  68. return 0;
  69. }
  70. #define LA_DEBUG_BUF_SZ PAGE_CACHE_SIZE
  71. #define LA_DEBUG_VER 1
  72. static ssize_t ocfs2_la_debug_read(struct file *file, char __user *userbuf,
  73. size_t count, loff_t *ppos)
  74. {
  75. struct ocfs2_super *osb = file->private_data;
  76. int written, ret;
  77. char *buf = osb->local_alloc_debug_buf;
  78. mutex_lock(&la_debug_mutex);
  79. memset(buf, 0, LA_DEBUG_BUF_SZ);
  80. written = snprintf(buf, LA_DEBUG_BUF_SZ,
  81. "0x%x\t0x%llx\t%u\t%u\t0x%x\n",
  82. LA_DEBUG_VER,
  83. (unsigned long long)osb->la_last_gd,
  84. osb->local_alloc_default_bits,
  85. osb->local_alloc_bits, osb->local_alloc_state);
  86. ret = simple_read_from_buffer(userbuf, count, ppos, buf, written);
  87. mutex_unlock(&la_debug_mutex);
  88. return ret;
  89. }
  90. static const struct file_operations ocfs2_la_debug_fops = {
  91. .open = ocfs2_la_debug_open,
  92. .read = ocfs2_la_debug_read,
  93. };
  94. static void ocfs2_init_la_debug(struct ocfs2_super *osb)
  95. {
  96. osb->local_alloc_debug_buf = kmalloc(LA_DEBUG_BUF_SZ, GFP_NOFS);
  97. if (!osb->local_alloc_debug_buf)
  98. return;
  99. osb->local_alloc_debug = debugfs_create_file("local_alloc_stats",
  100. S_IFREG|S_IRUSR,
  101. osb->osb_debug_root,
  102. osb,
  103. &ocfs2_la_debug_fops);
  104. if (!osb->local_alloc_debug) {
  105. kfree(osb->local_alloc_debug_buf);
  106. osb->local_alloc_debug_buf = NULL;
  107. }
  108. }
  109. static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb)
  110. {
  111. if (osb->local_alloc_debug)
  112. debugfs_remove(osb->local_alloc_debug);
  113. if (osb->local_alloc_debug_buf)
  114. kfree(osb->local_alloc_debug_buf);
  115. osb->local_alloc_debug_buf = NULL;
  116. osb->local_alloc_debug = NULL;
  117. }
  118. #else /* CONFIG_OCFS2_FS_STATS */
  119. static void ocfs2_init_la_debug(struct ocfs2_super *osb)
  120. {
  121. return;
  122. }
  123. static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb)
  124. {
  125. return;
  126. }
  127. #endif
  128. static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
  129. {
  130. return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
  131. osb->local_alloc_state == OCFS2_LA_ENABLED);
  132. }
  133. void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
  134. unsigned int num_clusters)
  135. {
  136. spin_lock(&osb->osb_lock);
  137. if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
  138. osb->local_alloc_state == OCFS2_LA_THROTTLED)
  139. if (num_clusters >= osb->local_alloc_default_bits) {
  140. cancel_delayed_work(&osb->la_enable_wq);
  141. osb->local_alloc_state = OCFS2_LA_ENABLED;
  142. }
  143. spin_unlock(&osb->osb_lock);
  144. }
  145. void ocfs2_la_enable_worker(struct work_struct *work)
  146. {
  147. struct ocfs2_super *osb =
  148. container_of(work, struct ocfs2_super,
  149. la_enable_wq.work);
  150. spin_lock(&osb->osb_lock);
  151. osb->local_alloc_state = OCFS2_LA_ENABLED;
  152. spin_unlock(&osb->osb_lock);
  153. }
  154. /*
  155. * Tell us whether a given allocation should use the local alloc
  156. * file. Otherwise, it has to go to the main bitmap.
  157. *
  158. * This function does semi-dirty reads of local alloc size and state!
  159. * This is ok however, as the values are re-checked once under mutex.
  160. */
  161. int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
  162. {
  163. int ret = 0;
  164. int la_bits;
  165. spin_lock(&osb->osb_lock);
  166. la_bits = osb->local_alloc_bits;
  167. if (!ocfs2_la_state_enabled(osb))
  168. goto bail;
  169. /* la_bits should be at least twice the size (in clusters) of
  170. * a new block group. We want to be sure block group
  171. * allocations go through the local alloc, so allow an
  172. * allocation to take up to half the bitmap. */
  173. if (bits > (la_bits / 2))
  174. goto bail;
  175. ret = 1;
  176. bail:
  177. mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n",
  178. osb->local_alloc_state, (unsigned long long)bits, la_bits, ret);
  179. spin_unlock(&osb->osb_lock);
  180. return ret;
  181. }
  182. int ocfs2_load_local_alloc(struct ocfs2_super *osb)
  183. {
  184. int status = 0;
  185. struct ocfs2_dinode *alloc = NULL;
  186. struct buffer_head *alloc_bh = NULL;
  187. u32 num_used;
  188. struct inode *inode = NULL;
  189. struct ocfs2_local_alloc *la;
  190. mlog_entry_void();
  191. ocfs2_init_la_debug(osb);
  192. if (osb->local_alloc_bits == 0)
  193. goto bail;
  194. if (osb->local_alloc_bits >= osb->bitmap_cpg) {
  195. mlog(ML_NOTICE, "Requested local alloc window %d is larger "
  196. "than max possible %u. Using defaults.\n",
  197. osb->local_alloc_bits, (osb->bitmap_cpg - 1));
  198. osb->local_alloc_bits =
  199. ocfs2_megabytes_to_clusters(osb->sb,
  200. OCFS2_DEFAULT_LOCAL_ALLOC_SIZE);
  201. }
  202. /* read the alloc off disk */
  203. inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE,
  204. osb->slot_num);
  205. if (!inode) {
  206. status = -EINVAL;
  207. mlog_errno(status);
  208. goto bail;
  209. }
  210. status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno,
  211. &alloc_bh, 0, inode);
  212. if (status < 0) {
  213. mlog_errno(status);
  214. goto bail;
  215. }
  216. alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
  217. la = OCFS2_LOCAL_ALLOC(alloc);
  218. if (!(le32_to_cpu(alloc->i_flags) &
  219. (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) {
  220. mlog(ML_ERROR, "Invalid local alloc inode, %llu\n",
  221. (unsigned long long)OCFS2_I(inode)->ip_blkno);
  222. status = -EINVAL;
  223. goto bail;
  224. }
  225. if ((la->la_size == 0) ||
  226. (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) {
  227. mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n",
  228. le16_to_cpu(la->la_size));
  229. status = -EINVAL;
  230. goto bail;
  231. }
  232. /* do a little verification. */
  233. num_used = ocfs2_local_alloc_count_bits(alloc);
  234. /* hopefully the local alloc has always been recovered before
  235. * we load it. */
  236. if (num_used
  237. || alloc->id1.bitmap1.i_used
  238. || alloc->id1.bitmap1.i_total
  239. || la->la_bm_off)
  240. mlog(ML_ERROR, "Local alloc hasn't been recovered!\n"
  241. "found = %u, set = %u, taken = %u, off = %u\n",
  242. num_used, le32_to_cpu(alloc->id1.bitmap1.i_used),
  243. le32_to_cpu(alloc->id1.bitmap1.i_total),
  244. OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
  245. osb->local_alloc_bh = alloc_bh;
  246. osb->local_alloc_state = OCFS2_LA_ENABLED;
  247. bail:
  248. if (status < 0)
  249. if (alloc_bh)
  250. brelse(alloc_bh);
  251. if (inode)
  252. iput(inode);
  253. if (status < 0)
  254. ocfs2_shutdown_la_debug(osb);
  255. mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits);
  256. mlog_exit(status);
  257. return status;
  258. }
  259. /*
  260. * return any unused bits to the bitmap and write out a clean
  261. * local_alloc.
  262. *
  263. * local_alloc_bh is optional. If not passed, we will simply use the
  264. * one off osb. If you do pass it however, be warned that it *will* be
  265. * returned brelse'd and NULL'd out.*/
  266. void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
  267. {
  268. int status;
  269. handle_t *handle;
  270. struct inode *local_alloc_inode = NULL;
  271. struct buffer_head *bh = NULL;
  272. struct buffer_head *main_bm_bh = NULL;
  273. struct inode *main_bm_inode = NULL;
  274. struct ocfs2_dinode *alloc_copy = NULL;
  275. struct ocfs2_dinode *alloc = NULL;
  276. mlog_entry_void();
  277. cancel_delayed_work(&osb->la_enable_wq);
  278. flush_workqueue(ocfs2_wq);
  279. ocfs2_shutdown_la_debug(osb);
  280. if (osb->local_alloc_state == OCFS2_LA_UNUSED)
  281. goto out;
  282. local_alloc_inode =
  283. ocfs2_get_system_file_inode(osb,
  284. LOCAL_ALLOC_SYSTEM_INODE,
  285. osb->slot_num);
  286. if (!local_alloc_inode) {
  287. status = -ENOENT;
  288. mlog_errno(status);
  289. goto out;
  290. }
  291. osb->local_alloc_state = OCFS2_LA_DISABLED;
  292. main_bm_inode = ocfs2_get_system_file_inode(osb,
  293. GLOBAL_BITMAP_SYSTEM_INODE,
  294. OCFS2_INVALID_SLOT);
  295. if (!main_bm_inode) {
  296. status = -EINVAL;
  297. mlog_errno(status);
  298. goto out;
  299. }
  300. mutex_lock(&main_bm_inode->i_mutex);
  301. status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
  302. if (status < 0) {
  303. mlog_errno(status);
  304. goto out_mutex;
  305. }
  306. /* WINDOW_MOVE_CREDITS is a bit heavy... */
  307. handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
  308. if (IS_ERR(handle)) {
  309. mlog_errno(PTR_ERR(handle));
  310. handle = NULL;
  311. goto out_unlock;
  312. }
  313. bh = osb->local_alloc_bh;
  314. alloc = (struct ocfs2_dinode *) bh->b_data;
  315. alloc_copy = kmalloc(bh->b_size, GFP_NOFS);
  316. if (!alloc_copy) {
  317. status = -ENOMEM;
  318. goto out_commit;
  319. }
  320. memcpy(alloc_copy, alloc, bh->b_size);
  321. status = ocfs2_journal_access(handle, local_alloc_inode, bh,
  322. OCFS2_JOURNAL_ACCESS_WRITE);
  323. if (status < 0) {
  324. mlog_errno(status);
  325. goto out_commit;
  326. }
  327. ocfs2_clear_local_alloc(alloc);
  328. status = ocfs2_journal_dirty(handle, bh);
  329. if (status < 0) {
  330. mlog_errno(status);
  331. goto out_commit;
  332. }
  333. brelse(bh);
  334. osb->local_alloc_bh = NULL;
  335. osb->local_alloc_state = OCFS2_LA_UNUSED;
  336. status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
  337. main_bm_inode, main_bm_bh);
  338. if (status < 0)
  339. mlog_errno(status);
  340. out_commit:
  341. ocfs2_commit_trans(osb, handle);
  342. out_unlock:
  343. if (main_bm_bh)
  344. brelse(main_bm_bh);
  345. ocfs2_inode_unlock(main_bm_inode, 1);
  346. out_mutex:
  347. mutex_unlock(&main_bm_inode->i_mutex);
  348. iput(main_bm_inode);
  349. out:
  350. if (local_alloc_inode)
  351. iput(local_alloc_inode);
  352. if (alloc_copy)
  353. kfree(alloc_copy);
  354. mlog_exit_void();
  355. }
  356. /*
  357. * We want to free the bitmap bits outside of any recovery context as
  358. * we'll need a cluster lock to do so, but we must clear the local
  359. * alloc before giving up the recovered nodes journal. To solve this,
  360. * we kmalloc a copy of the local alloc before it's change for the
  361. * caller to process with ocfs2_complete_local_alloc_recovery
  362. */
  363. int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
  364. int slot_num,
  365. struct ocfs2_dinode **alloc_copy)
  366. {
  367. int status = 0;
  368. struct buffer_head *alloc_bh = NULL;
  369. struct inode *inode = NULL;
  370. struct ocfs2_dinode *alloc;
  371. mlog_entry("(slot_num = %d)\n", slot_num);
  372. *alloc_copy = NULL;
  373. inode = ocfs2_get_system_file_inode(osb,
  374. LOCAL_ALLOC_SYSTEM_INODE,
  375. slot_num);
  376. if (!inode) {
  377. status = -EINVAL;
  378. mlog_errno(status);
  379. goto bail;
  380. }
  381. mutex_lock(&inode->i_mutex);
  382. status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno,
  383. &alloc_bh, 0, inode);
  384. if (status < 0) {
  385. mlog_errno(status);
  386. goto bail;
  387. }
  388. *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL);
  389. if (!(*alloc_copy)) {
  390. status = -ENOMEM;
  391. goto bail;
  392. }
  393. memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size);
  394. alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
  395. ocfs2_clear_local_alloc(alloc);
  396. status = ocfs2_write_block(osb, alloc_bh, inode);
  397. if (status < 0)
  398. mlog_errno(status);
  399. bail:
  400. if ((status < 0) && (*alloc_copy)) {
  401. kfree(*alloc_copy);
  402. *alloc_copy = NULL;
  403. }
  404. if (alloc_bh)
  405. brelse(alloc_bh);
  406. if (inode) {
  407. mutex_unlock(&inode->i_mutex);
  408. iput(inode);
  409. }
  410. mlog_exit(status);
  411. return status;
  412. }
  413. /*
  414. * Step 2: By now, we've completed the journal recovery, we've stamped
  415. * a clean local alloc on disk and dropped the node out of the
  416. * recovery map. Dlm locks will no longer stall, so lets clear out the
  417. * main bitmap.
  418. */
  419. int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
  420. struct ocfs2_dinode *alloc)
  421. {
  422. int status;
  423. handle_t *handle;
  424. struct buffer_head *main_bm_bh = NULL;
  425. struct inode *main_bm_inode;
  426. mlog_entry_void();
  427. main_bm_inode = ocfs2_get_system_file_inode(osb,
  428. GLOBAL_BITMAP_SYSTEM_INODE,
  429. OCFS2_INVALID_SLOT);
  430. if (!main_bm_inode) {
  431. status = -EINVAL;
  432. mlog_errno(status);
  433. goto out;
  434. }
  435. mutex_lock(&main_bm_inode->i_mutex);
  436. status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
  437. if (status < 0) {
  438. mlog_errno(status);
  439. goto out_mutex;
  440. }
  441. handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
  442. if (IS_ERR(handle)) {
  443. status = PTR_ERR(handle);
  444. handle = NULL;
  445. mlog_errno(status);
  446. goto out_unlock;
  447. }
  448. /* we want the bitmap change to be recorded on disk asap */
  449. handle->h_sync = 1;
  450. status = ocfs2_sync_local_to_main(osb, handle, alloc,
  451. main_bm_inode, main_bm_bh);
  452. if (status < 0)
  453. mlog_errno(status);
  454. ocfs2_commit_trans(osb, handle);
  455. out_unlock:
  456. ocfs2_inode_unlock(main_bm_inode, 1);
  457. out_mutex:
  458. mutex_unlock(&main_bm_inode->i_mutex);
  459. if (main_bm_bh)
  460. brelse(main_bm_bh);
  461. iput(main_bm_inode);
  462. out:
  463. if (!status)
  464. ocfs2_init_inode_steal_slot(osb);
  465. mlog_exit(status);
  466. return status;
  467. }
  468. /*
  469. * make sure we've got at least bits_wanted contiguous bits in the
  470. * local alloc. You lose them when you drop i_mutex.
  471. *
  472. * We will add ourselves to the transaction passed in, but may start
  473. * our own in order to shift windows.
  474. */
  475. int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
  476. u32 bits_wanted,
  477. struct ocfs2_alloc_context *ac)
  478. {
  479. int status;
  480. struct ocfs2_dinode *alloc;
  481. struct inode *local_alloc_inode;
  482. unsigned int free_bits;
  483. mlog_entry_void();
  484. BUG_ON(!ac);
  485. local_alloc_inode =
  486. ocfs2_get_system_file_inode(osb,
  487. LOCAL_ALLOC_SYSTEM_INODE,
  488. osb->slot_num);
  489. if (!local_alloc_inode) {
  490. status = -ENOENT;
  491. mlog_errno(status);
  492. goto bail;
  493. }
  494. mutex_lock(&local_alloc_inode->i_mutex);
  495. /*
  496. * We must double check state and allocator bits because
  497. * another process may have changed them while holding i_mutex.
  498. */
  499. spin_lock(&osb->osb_lock);
  500. if (!ocfs2_la_state_enabled(osb) ||
  501. (bits_wanted > osb->local_alloc_bits)) {
  502. spin_unlock(&osb->osb_lock);
  503. status = -ENOSPC;
  504. goto bail;
  505. }
  506. spin_unlock(&osb->osb_lock);
  507. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  508. #ifdef CONFIG_OCFS2_DEBUG_FS
  509. if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
  510. ocfs2_local_alloc_count_bits(alloc)) {
  511. ocfs2_error(osb->sb, "local alloc inode %llu says it has "
  512. "%u free bits, but a count shows %u",
  513. (unsigned long long)le64_to_cpu(alloc->i_blkno),
  514. le32_to_cpu(alloc->id1.bitmap1.i_used),
  515. ocfs2_local_alloc_count_bits(alloc));
  516. status = -EIO;
  517. goto bail;
  518. }
  519. #endif
  520. free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
  521. le32_to_cpu(alloc->id1.bitmap1.i_used);
  522. if (bits_wanted > free_bits) {
  523. /* uhoh, window change time. */
  524. status =
  525. ocfs2_local_alloc_slide_window(osb, local_alloc_inode);
  526. if (status < 0) {
  527. if (status != -ENOSPC)
  528. mlog_errno(status);
  529. goto bail;
  530. }
  531. /*
  532. * Under certain conditions, the window slide code
  533. * might have reduced the number of bits available or
  534. * disabled the the local alloc entirely. Re-check
  535. * here and return -ENOSPC if necessary.
  536. */
  537. status = -ENOSPC;
  538. if (!ocfs2_la_state_enabled(osb))
  539. goto bail;
  540. free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
  541. le32_to_cpu(alloc->id1.bitmap1.i_used);
  542. if (bits_wanted > free_bits)
  543. goto bail;
  544. }
  545. ac->ac_inode = local_alloc_inode;
  546. /* We should never use localalloc from another slot */
  547. ac->ac_alloc_slot = osb->slot_num;
  548. ac->ac_which = OCFS2_AC_USE_LOCAL;
  549. get_bh(osb->local_alloc_bh);
  550. ac->ac_bh = osb->local_alloc_bh;
  551. status = 0;
  552. bail:
  553. if (status < 0 && local_alloc_inode) {
  554. mutex_unlock(&local_alloc_inode->i_mutex);
  555. iput(local_alloc_inode);
  556. }
  557. mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num,
  558. status);
  559. mlog_exit(status);
  560. return status;
  561. }
  562. int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
  563. handle_t *handle,
  564. struct ocfs2_alloc_context *ac,
  565. u32 bits_wanted,
  566. u32 *bit_off,
  567. u32 *num_bits)
  568. {
  569. int status, start;
  570. struct inode *local_alloc_inode;
  571. void *bitmap;
  572. struct ocfs2_dinode *alloc;
  573. struct ocfs2_local_alloc *la;
  574. mlog_entry_void();
  575. BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
  576. local_alloc_inode = ac->ac_inode;
  577. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  578. la = OCFS2_LOCAL_ALLOC(alloc);
  579. start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
  580. if (start == -1) {
  581. /* TODO: Shouldn't we just BUG here? */
  582. status = -ENOSPC;
  583. mlog_errno(status);
  584. goto bail;
  585. }
  586. bitmap = la->la_bitmap;
  587. *bit_off = le32_to_cpu(la->la_bm_off) + start;
  588. /* local alloc is always contiguous by nature -- we never
  589. * delete bits from it! */
  590. *num_bits = bits_wanted;
  591. status = ocfs2_journal_access(handle, local_alloc_inode,
  592. osb->local_alloc_bh,
  593. OCFS2_JOURNAL_ACCESS_WRITE);
  594. if (status < 0) {
  595. mlog_errno(status);
  596. goto bail;
  597. }
  598. while(bits_wanted--)
  599. ocfs2_set_bit(start++, bitmap);
  600. le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
  601. status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
  602. if (status < 0) {
  603. mlog_errno(status);
  604. goto bail;
  605. }
  606. status = 0;
  607. bail:
  608. mlog_exit(status);
  609. return status;
  610. }
  611. static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
  612. {
  613. int i;
  614. u8 *buffer;
  615. u32 count = 0;
  616. struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
  617. mlog_entry_void();
  618. buffer = la->la_bitmap;
  619. for (i = 0; i < le16_to_cpu(la->la_size); i++)
  620. count += hweight8(buffer[i]);
  621. mlog_exit(count);
  622. return count;
  623. }
  624. static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
  625. struct ocfs2_dinode *alloc,
  626. u32 numbits)
  627. {
  628. int numfound, bitoff, left, startoff, lastzero;
  629. void *bitmap = NULL;
  630. mlog_entry("(numbits wanted = %u)\n", numbits);
  631. if (!alloc->id1.bitmap1.i_total) {
  632. mlog(0, "No bits in my window!\n");
  633. bitoff = -1;
  634. goto bail;
  635. }
  636. bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
  637. numfound = bitoff = startoff = 0;
  638. lastzero = -1;
  639. left = le32_to_cpu(alloc->id1.bitmap1.i_total);
  640. while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) {
  641. if (bitoff == left) {
  642. /* mlog(0, "bitoff (%d) == left", bitoff); */
  643. break;
  644. }
  645. /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, "
  646. "numfound = %d\n", bitoff, startoff, numfound);*/
  647. /* Ok, we found a zero bit... is it contig. or do we
  648. * start over?*/
  649. if (bitoff == startoff) {
  650. /* we found a zero */
  651. numfound++;
  652. startoff++;
  653. } else {
  654. /* got a zero after some ones */
  655. numfound = 1;
  656. startoff = bitoff+1;
  657. }
  658. /* we got everything we needed */
  659. if (numfound == numbits) {
  660. /* mlog(0, "Found it all!\n"); */
  661. break;
  662. }
  663. }
  664. mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
  665. numfound);
  666. if (numfound == numbits)
  667. bitoff = startoff - numfound;
  668. else
  669. bitoff = -1;
  670. bail:
  671. mlog_exit(bitoff);
  672. return bitoff;
  673. }
  674. static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc)
  675. {
  676. struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
  677. int i;
  678. mlog_entry_void();
  679. alloc->id1.bitmap1.i_total = 0;
  680. alloc->id1.bitmap1.i_used = 0;
  681. la->la_bm_off = 0;
  682. for(i = 0; i < le16_to_cpu(la->la_size); i++)
  683. la->la_bitmap[i] = 0;
  684. mlog_exit_void();
  685. }
  686. #if 0
  687. /* turn this on and uncomment below to aid debugging window shifts. */
  688. static void ocfs2_verify_zero_bits(unsigned long *bitmap,
  689. unsigned int start,
  690. unsigned int count)
  691. {
  692. unsigned int tmp = count;
  693. while(tmp--) {
  694. if (ocfs2_test_bit(start + tmp, bitmap)) {
  695. printk("ocfs2_verify_zero_bits: start = %u, count = "
  696. "%u\n", start, count);
  697. printk("ocfs2_verify_zero_bits: bit %u is set!",
  698. start + tmp);
  699. BUG();
  700. }
  701. }
  702. }
  703. #endif
  704. /*
  705. * sync the local alloc to main bitmap.
  706. *
  707. * assumes you've already locked the main bitmap -- the bitmap inode
  708. * passed is used for caching.
  709. */
  710. static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
  711. handle_t *handle,
  712. struct ocfs2_dinode *alloc,
  713. struct inode *main_bm_inode,
  714. struct buffer_head *main_bm_bh)
  715. {
  716. int status = 0;
  717. int bit_off, left, count, start;
  718. u64 la_start_blk;
  719. u64 blkno;
  720. void *bitmap;
  721. struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
  722. mlog_entry("total = %u, used = %u\n",
  723. le32_to_cpu(alloc->id1.bitmap1.i_total),
  724. le32_to_cpu(alloc->id1.bitmap1.i_used));
  725. if (!alloc->id1.bitmap1.i_total) {
  726. mlog(0, "nothing to sync!\n");
  727. goto bail;
  728. }
  729. if (le32_to_cpu(alloc->id1.bitmap1.i_used) ==
  730. le32_to_cpu(alloc->id1.bitmap1.i_total)) {
  731. mlog(0, "all bits were taken!\n");
  732. goto bail;
  733. }
  734. la_start_blk = ocfs2_clusters_to_blocks(osb->sb,
  735. le32_to_cpu(la->la_bm_off));
  736. bitmap = la->la_bitmap;
  737. start = count = bit_off = 0;
  738. left = le32_to_cpu(alloc->id1.bitmap1.i_total);
  739. while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start))
  740. != -1) {
  741. if ((bit_off < left) && (bit_off == start)) {
  742. count++;
  743. start++;
  744. continue;
  745. }
  746. if (count) {
  747. blkno = la_start_blk +
  748. ocfs2_clusters_to_blocks(osb->sb,
  749. start - count);
  750. mlog(0, "freeing %u bits starting at local alloc bit "
  751. "%u (la_start_blk = %llu, blkno = %llu)\n",
  752. count, start - count,
  753. (unsigned long long)la_start_blk,
  754. (unsigned long long)blkno);
  755. status = ocfs2_free_clusters(handle, main_bm_inode,
  756. main_bm_bh, blkno, count);
  757. if (status < 0) {
  758. mlog_errno(status);
  759. goto bail;
  760. }
  761. }
  762. if (bit_off >= left)
  763. break;
  764. count = 1;
  765. start = bit_off + 1;
  766. }
  767. bail:
  768. mlog_exit(status);
  769. return status;
  770. }
  771. enum ocfs2_la_event {
  772. OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */
  773. OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has
  774. * enough bits theoretically
  775. * free, but a contiguous
  776. * allocation could not be
  777. * found. */
  778. OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have
  779. * enough bits free to satisfy
  780. * our request. */
  781. };
  782. #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ)
  783. /*
  784. * Given an event, calculate the size of our next local alloc window.
  785. *
  786. * This should always be called under i_mutex of the local alloc inode
  787. * so that local alloc disabling doesn't race with processes trying to
  788. * use the allocator.
  789. *
  790. * Returns the state which the local alloc was left in. This value can
  791. * be ignored by some paths.
  792. */
  793. static int ocfs2_recalc_la_window(struct ocfs2_super *osb,
  794. enum ocfs2_la_event event)
  795. {
  796. unsigned int bits;
  797. int state;
  798. spin_lock(&osb->osb_lock);
  799. if (osb->local_alloc_state == OCFS2_LA_DISABLED) {
  800. WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED);
  801. goto out_unlock;
  802. }
  803. /*
  804. * ENOSPC and fragmentation are treated similarly for now.
  805. */
  806. if (event == OCFS2_LA_EVENT_ENOSPC ||
  807. event == OCFS2_LA_EVENT_FRAGMENTED) {
  808. /*
  809. * We ran out of contiguous space in the primary
  810. * bitmap. Drastically reduce the number of bits used
  811. * by local alloc until we have to disable it.
  812. */
  813. bits = osb->local_alloc_bits >> 1;
  814. if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) {
  815. /*
  816. * By setting state to THROTTLED, we'll keep
  817. * the number of local alloc bits used down
  818. * until an event occurs which would give us
  819. * reason to assume the bitmap situation might
  820. * have changed.
  821. */
  822. osb->local_alloc_state = OCFS2_LA_THROTTLED;
  823. osb->local_alloc_bits = bits;
  824. } else {
  825. osb->local_alloc_state = OCFS2_LA_DISABLED;
  826. }
  827. queue_delayed_work(ocfs2_wq, &osb->la_enable_wq,
  828. OCFS2_LA_ENABLE_INTERVAL);
  829. goto out_unlock;
  830. }
  831. /*
  832. * Don't increase the size of the local alloc window until we
  833. * know we might be able to fulfill the request. Otherwise, we
  834. * risk bouncing around the global bitmap during periods of
  835. * low space.
  836. */
  837. if (osb->local_alloc_state != OCFS2_LA_THROTTLED)
  838. osb->local_alloc_bits = osb->local_alloc_default_bits;
  839. out_unlock:
  840. state = osb->local_alloc_state;
  841. spin_unlock(&osb->osb_lock);
  842. return state;
  843. }
  844. static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
  845. struct ocfs2_alloc_context **ac,
  846. struct inode **bitmap_inode,
  847. struct buffer_head **bitmap_bh)
  848. {
  849. int status;
  850. *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
  851. if (!(*ac)) {
  852. status = -ENOMEM;
  853. mlog_errno(status);
  854. goto bail;
  855. }
  856. retry_enospc:
  857. (*ac)->ac_bits_wanted = osb->local_alloc_bits;
  858. status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
  859. if (status == -ENOSPC) {
  860. if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
  861. OCFS2_LA_DISABLED)
  862. goto bail;
  863. ocfs2_free_ac_resource(*ac);
  864. memset(*ac, 0, sizeof(struct ocfs2_alloc_context));
  865. goto retry_enospc;
  866. }
  867. if (status < 0) {
  868. mlog_errno(status);
  869. goto bail;
  870. }
  871. *bitmap_inode = (*ac)->ac_inode;
  872. igrab(*bitmap_inode);
  873. *bitmap_bh = (*ac)->ac_bh;
  874. get_bh(*bitmap_bh);
  875. status = 0;
  876. bail:
  877. if ((status < 0) && *ac) {
  878. ocfs2_free_alloc_context(*ac);
  879. *ac = NULL;
  880. }
  881. mlog_exit(status);
  882. return status;
  883. }
  884. /*
  885. * pass it the bitmap lock in lock_bh if you have it.
  886. */
  887. static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
  888. handle_t *handle,
  889. struct ocfs2_alloc_context *ac)
  890. {
  891. int status = 0;
  892. u32 cluster_off, cluster_count;
  893. struct ocfs2_dinode *alloc = NULL;
  894. struct ocfs2_local_alloc *la;
  895. mlog_entry_void();
  896. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  897. la = OCFS2_LOCAL_ALLOC(alloc);
  898. if (alloc->id1.bitmap1.i_total)
  899. mlog(0, "asking me to alloc a new window over a non-empty "
  900. "one\n");
  901. mlog(0, "Allocating %u clusters for a new window.\n",
  902. osb->local_alloc_bits);
  903. /* Instruct the allocation code to try the most recently used
  904. * cluster group. We'll re-record the group used this pass
  905. * below. */
  906. ac->ac_last_group = osb->la_last_gd;
  907. /* we used the generic suballoc reserve function, but we set
  908. * everything up nicely, so there's no reason why we can't use
  909. * the more specific cluster api to claim bits. */
  910. status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits,
  911. &cluster_off, &cluster_count);
  912. if (status == -ENOSPC) {
  913. retry_enospc:
  914. /*
  915. * Note: We could also try syncing the journal here to
  916. * allow use of any free bits which the current
  917. * transaction can't give us access to. --Mark
  918. */
  919. if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) ==
  920. OCFS2_LA_DISABLED)
  921. goto bail;
  922. status = ocfs2_claim_clusters(osb, handle, ac,
  923. osb->local_alloc_bits,
  924. &cluster_off,
  925. &cluster_count);
  926. if (status == -ENOSPC)
  927. goto retry_enospc;
  928. /*
  929. * We only shrunk the *minimum* number of in our
  930. * request - it's entirely possible that the allocator
  931. * might give us more than we asked for.
  932. */
  933. if (status == 0) {
  934. spin_lock(&osb->osb_lock);
  935. osb->local_alloc_bits = cluster_count;
  936. spin_unlock(&osb->osb_lock);
  937. }
  938. }
  939. if (status < 0) {
  940. if (status != -ENOSPC)
  941. mlog_errno(status);
  942. goto bail;
  943. }
  944. osb->la_last_gd = ac->ac_last_group;
  945. la->la_bm_off = cpu_to_le32(cluster_off);
  946. alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count);
  947. /* just in case... In the future when we find space ourselves,
  948. * we don't have to get all contiguous -- but we'll have to
  949. * set all previously used bits in bitmap and update
  950. * la_bits_set before setting the bits in the main bitmap. */
  951. alloc->id1.bitmap1.i_used = 0;
  952. memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
  953. le16_to_cpu(la->la_size));
  954. mlog(0, "New window allocated:\n");
  955. mlog(0, "window la_bm_off = %u\n",
  956. OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
  957. mlog(0, "window bits = %u\n", le32_to_cpu(alloc->id1.bitmap1.i_total));
  958. bail:
  959. mlog_exit(status);
  960. return status;
  961. }
  962. /* Note that we do *NOT* lock the local alloc inode here as
  963. * it's been locked already for us. */
  964. static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
  965. struct inode *local_alloc_inode)
  966. {
  967. int status = 0;
  968. struct buffer_head *main_bm_bh = NULL;
  969. struct inode *main_bm_inode = NULL;
  970. handle_t *handle = NULL;
  971. struct ocfs2_dinode *alloc;
  972. struct ocfs2_dinode *alloc_copy = NULL;
  973. struct ocfs2_alloc_context *ac = NULL;
  974. mlog_entry_void();
  975. ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE);
  976. /* This will lock the main bitmap for us. */
  977. status = ocfs2_local_alloc_reserve_for_window(osb,
  978. &ac,
  979. &main_bm_inode,
  980. &main_bm_bh);
  981. if (status < 0) {
  982. if (status != -ENOSPC)
  983. mlog_errno(status);
  984. goto bail;
  985. }
  986. handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
  987. if (IS_ERR(handle)) {
  988. status = PTR_ERR(handle);
  989. handle = NULL;
  990. mlog_errno(status);
  991. goto bail;
  992. }
  993. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  994. /* We want to clear the local alloc before doing anything
  995. * else, so that if we error later during this operation,
  996. * local alloc shutdown won't try to double free main bitmap
  997. * bits. Make a copy so the sync function knows which bits to
  998. * free. */
  999. alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS);
  1000. if (!alloc_copy) {
  1001. status = -ENOMEM;
  1002. mlog_errno(status);
  1003. goto bail;
  1004. }
  1005. memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
  1006. status = ocfs2_journal_access(handle, local_alloc_inode,
  1007. osb->local_alloc_bh,
  1008. OCFS2_JOURNAL_ACCESS_WRITE);
  1009. if (status < 0) {
  1010. mlog_errno(status);
  1011. goto bail;
  1012. }
  1013. ocfs2_clear_local_alloc(alloc);
  1014. status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
  1015. if (status < 0) {
  1016. mlog_errno(status);
  1017. goto bail;
  1018. }
  1019. status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
  1020. main_bm_inode, main_bm_bh);
  1021. if (status < 0) {
  1022. mlog_errno(status);
  1023. goto bail;
  1024. }
  1025. status = ocfs2_local_alloc_new_window(osb, handle, ac);
  1026. if (status < 0) {
  1027. if (status != -ENOSPC)
  1028. mlog_errno(status);
  1029. goto bail;
  1030. }
  1031. atomic_inc(&osb->alloc_stats.moves);
  1032. status = 0;
  1033. bail:
  1034. if (handle)
  1035. ocfs2_commit_trans(osb, handle);
  1036. if (main_bm_bh)
  1037. brelse(main_bm_bh);
  1038. if (main_bm_inode)
  1039. iput(main_bm_inode);
  1040. if (alloc_copy)
  1041. kfree(alloc_copy);
  1042. if (ac)
  1043. ocfs2_free_alloc_context(ac);
  1044. mlog_exit(status);
  1045. return status;
  1046. }