localalloc.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212
  1. /* -*- mode: c; c-basic-offset: 8; -*-
  2. * vim: noexpandtab sw=8 ts=8 sts=0:
  3. *
  4. * localalloc.c
  5. *
  6. * Node local data allocation
  7. *
  8. * Copyright (C) 2002, 2004 Oracle. All rights reserved.
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2 of the License, or (at your option) any later version.
  14. *
  15. * This program is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU General Public
  21. * License along with this program; if not, write to the
  22. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23. * Boston, MA 021110-1307, USA.
  24. */
  25. #include <linux/fs.h>
  26. #include <linux/types.h>
  27. #include <linux/slab.h>
  28. #include <linux/highmem.h>
  29. #include <linux/bitops.h>
  30. #define MLOG_MASK_PREFIX ML_DISK_ALLOC
  31. #include <cluster/masklog.h>
  32. #include "ocfs2.h"
  33. #include "alloc.h"
  34. #include "blockcheck.h"
  35. #include "dlmglue.h"
  36. #include "inode.h"
  37. #include "journal.h"
  38. #include "localalloc.h"
  39. #include "suballoc.h"
  40. #include "super.h"
  41. #include "sysfile.h"
  42. #include "buffer_head_io.h"
  43. #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab))
  44. static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
  45. static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
  46. struct ocfs2_dinode *alloc,
  47. u32 numbits);
  48. static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
  49. static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
  50. handle_t *handle,
  51. struct ocfs2_dinode *alloc,
  52. struct inode *main_bm_inode,
  53. struct buffer_head *main_bm_bh);
  54. static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
  55. struct ocfs2_alloc_context **ac,
  56. struct inode **bitmap_inode,
  57. struct buffer_head **bitmap_bh);
  58. static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
  59. handle_t *handle,
  60. struct ocfs2_alloc_context *ac);
  61. static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
  62. struct inode *local_alloc_inode);
  63. static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
  64. {
  65. return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
  66. osb->local_alloc_state == OCFS2_LA_ENABLED);
  67. }
  68. void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
  69. unsigned int num_clusters)
  70. {
  71. spin_lock(&osb->osb_lock);
  72. if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
  73. osb->local_alloc_state == OCFS2_LA_THROTTLED)
  74. if (num_clusters >= osb->local_alloc_default_bits) {
  75. cancel_delayed_work(&osb->la_enable_wq);
  76. osb->local_alloc_state = OCFS2_LA_ENABLED;
  77. }
  78. spin_unlock(&osb->osb_lock);
  79. }
  80. void ocfs2_la_enable_worker(struct work_struct *work)
  81. {
  82. struct ocfs2_super *osb =
  83. container_of(work, struct ocfs2_super,
  84. la_enable_wq.work);
  85. spin_lock(&osb->osb_lock);
  86. osb->local_alloc_state = OCFS2_LA_ENABLED;
  87. spin_unlock(&osb->osb_lock);
  88. }
  89. /*
  90. * Tell us whether a given allocation should use the local alloc
  91. * file. Otherwise, it has to go to the main bitmap.
  92. *
  93. * This function does semi-dirty reads of local alloc size and state!
  94. * This is ok however, as the values are re-checked once under mutex.
  95. */
  96. int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
  97. {
  98. int ret = 0;
  99. int la_bits;
  100. spin_lock(&osb->osb_lock);
  101. la_bits = osb->local_alloc_bits;
  102. if (!ocfs2_la_state_enabled(osb))
  103. goto bail;
  104. /* la_bits should be at least twice the size (in clusters) of
  105. * a new block group. We want to be sure block group
  106. * allocations go through the local alloc, so allow an
  107. * allocation to take up to half the bitmap. */
  108. if (bits > (la_bits / 2))
  109. goto bail;
  110. ret = 1;
  111. bail:
  112. mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n",
  113. osb->local_alloc_state, (unsigned long long)bits, la_bits, ret);
  114. spin_unlock(&osb->osb_lock);
  115. return ret;
  116. }
  117. int ocfs2_load_local_alloc(struct ocfs2_super *osb)
  118. {
  119. int status = 0;
  120. struct ocfs2_dinode *alloc = NULL;
  121. struct buffer_head *alloc_bh = NULL;
  122. u32 num_used;
  123. struct inode *inode = NULL;
  124. struct ocfs2_local_alloc *la;
  125. mlog_entry_void();
  126. if (osb->local_alloc_bits == 0)
  127. goto bail;
  128. if (osb->local_alloc_bits >= osb->bitmap_cpg) {
  129. mlog(ML_NOTICE, "Requested local alloc window %d is larger "
  130. "than max possible %u. Using defaults.\n",
  131. osb->local_alloc_bits, (osb->bitmap_cpg - 1));
  132. osb->local_alloc_bits =
  133. ocfs2_megabytes_to_clusters(osb->sb,
  134. OCFS2_DEFAULT_LOCAL_ALLOC_SIZE);
  135. }
  136. /* read the alloc off disk */
  137. inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE,
  138. osb->slot_num);
  139. if (!inode) {
  140. status = -EINVAL;
  141. mlog_errno(status);
  142. goto bail;
  143. }
  144. status = ocfs2_read_inode_block_full(inode, &alloc_bh,
  145. OCFS2_BH_IGNORE_CACHE);
  146. if (status < 0) {
  147. mlog_errno(status);
  148. goto bail;
  149. }
  150. alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
  151. la = OCFS2_LOCAL_ALLOC(alloc);
  152. if (!(le32_to_cpu(alloc->i_flags) &
  153. (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) {
  154. mlog(ML_ERROR, "Invalid local alloc inode, %llu\n",
  155. (unsigned long long)OCFS2_I(inode)->ip_blkno);
  156. status = -EINVAL;
  157. goto bail;
  158. }
  159. if ((la->la_size == 0) ||
  160. (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) {
  161. mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n",
  162. le16_to_cpu(la->la_size));
  163. status = -EINVAL;
  164. goto bail;
  165. }
  166. /* do a little verification. */
  167. num_used = ocfs2_local_alloc_count_bits(alloc);
  168. /* hopefully the local alloc has always been recovered before
  169. * we load it. */
  170. if (num_used
  171. || alloc->id1.bitmap1.i_used
  172. || alloc->id1.bitmap1.i_total
  173. || la->la_bm_off)
  174. mlog(ML_ERROR, "Local alloc hasn't been recovered!\n"
  175. "found = %u, set = %u, taken = %u, off = %u\n",
  176. num_used, le32_to_cpu(alloc->id1.bitmap1.i_used),
  177. le32_to_cpu(alloc->id1.bitmap1.i_total),
  178. OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
  179. osb->local_alloc_bh = alloc_bh;
  180. osb->local_alloc_state = OCFS2_LA_ENABLED;
  181. bail:
  182. if (status < 0)
  183. brelse(alloc_bh);
  184. if (inode)
  185. iput(inode);
  186. mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits);
  187. mlog_exit(status);
  188. return status;
  189. }
  190. /*
  191. * return any unused bits to the bitmap and write out a clean
  192. * local_alloc.
  193. *
  194. * local_alloc_bh is optional. If not passed, we will simply use the
  195. * one off osb. If you do pass it however, be warned that it *will* be
  196. * returned brelse'd and NULL'd out.*/
  197. void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
  198. {
  199. int status;
  200. handle_t *handle;
  201. struct inode *local_alloc_inode = NULL;
  202. struct buffer_head *bh = NULL;
  203. struct buffer_head *main_bm_bh = NULL;
  204. struct inode *main_bm_inode = NULL;
  205. struct ocfs2_dinode *alloc_copy = NULL;
  206. struct ocfs2_dinode *alloc = NULL;
  207. mlog_entry_void();
  208. cancel_delayed_work(&osb->la_enable_wq);
  209. flush_workqueue(ocfs2_wq);
  210. if (osb->local_alloc_state == OCFS2_LA_UNUSED)
  211. goto out;
  212. local_alloc_inode =
  213. ocfs2_get_system_file_inode(osb,
  214. LOCAL_ALLOC_SYSTEM_INODE,
  215. osb->slot_num);
  216. if (!local_alloc_inode) {
  217. status = -ENOENT;
  218. mlog_errno(status);
  219. goto out;
  220. }
  221. osb->local_alloc_state = OCFS2_LA_DISABLED;
  222. main_bm_inode = ocfs2_get_system_file_inode(osb,
  223. GLOBAL_BITMAP_SYSTEM_INODE,
  224. OCFS2_INVALID_SLOT);
  225. if (!main_bm_inode) {
  226. status = -EINVAL;
  227. mlog_errno(status);
  228. goto out;
  229. }
  230. mutex_lock(&main_bm_inode->i_mutex);
  231. status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
  232. if (status < 0) {
  233. mlog_errno(status);
  234. goto out_mutex;
  235. }
  236. /* WINDOW_MOVE_CREDITS is a bit heavy... */
  237. handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
  238. if (IS_ERR(handle)) {
  239. mlog_errno(PTR_ERR(handle));
  240. handle = NULL;
  241. goto out_unlock;
  242. }
  243. bh = osb->local_alloc_bh;
  244. alloc = (struct ocfs2_dinode *) bh->b_data;
  245. alloc_copy = kmalloc(bh->b_size, GFP_NOFS);
  246. if (!alloc_copy) {
  247. status = -ENOMEM;
  248. goto out_commit;
  249. }
  250. memcpy(alloc_copy, alloc, bh->b_size);
  251. status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode),
  252. bh, OCFS2_JOURNAL_ACCESS_WRITE);
  253. if (status < 0) {
  254. mlog_errno(status);
  255. goto out_commit;
  256. }
  257. ocfs2_clear_local_alloc(alloc);
  258. status = ocfs2_journal_dirty(handle, bh);
  259. if (status < 0) {
  260. mlog_errno(status);
  261. goto out_commit;
  262. }
  263. brelse(bh);
  264. osb->local_alloc_bh = NULL;
  265. osb->local_alloc_state = OCFS2_LA_UNUSED;
  266. status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
  267. main_bm_inode, main_bm_bh);
  268. if (status < 0)
  269. mlog_errno(status);
  270. out_commit:
  271. ocfs2_commit_trans(osb, handle);
  272. out_unlock:
  273. brelse(main_bm_bh);
  274. ocfs2_inode_unlock(main_bm_inode, 1);
  275. out_mutex:
  276. mutex_unlock(&main_bm_inode->i_mutex);
  277. iput(main_bm_inode);
  278. out:
  279. if (local_alloc_inode)
  280. iput(local_alloc_inode);
  281. if (alloc_copy)
  282. kfree(alloc_copy);
  283. mlog_exit_void();
  284. }
  285. /*
  286. * We want to free the bitmap bits outside of any recovery context as
  287. * we'll need a cluster lock to do so, but we must clear the local
  288. * alloc before giving up the recovered nodes journal. To solve this,
  289. * we kmalloc a copy of the local alloc before it's change for the
  290. * caller to process with ocfs2_complete_local_alloc_recovery
  291. */
  292. int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
  293. int slot_num,
  294. struct ocfs2_dinode **alloc_copy)
  295. {
  296. int status = 0;
  297. struct buffer_head *alloc_bh = NULL;
  298. struct inode *inode = NULL;
  299. struct ocfs2_dinode *alloc;
  300. mlog_entry("(slot_num = %d)\n", slot_num);
  301. *alloc_copy = NULL;
  302. inode = ocfs2_get_system_file_inode(osb,
  303. LOCAL_ALLOC_SYSTEM_INODE,
  304. slot_num);
  305. if (!inode) {
  306. status = -EINVAL;
  307. mlog_errno(status);
  308. goto bail;
  309. }
  310. mutex_lock(&inode->i_mutex);
  311. status = ocfs2_read_inode_block_full(inode, &alloc_bh,
  312. OCFS2_BH_IGNORE_CACHE);
  313. if (status < 0) {
  314. mlog_errno(status);
  315. goto bail;
  316. }
  317. *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL);
  318. if (!(*alloc_copy)) {
  319. status = -ENOMEM;
  320. goto bail;
  321. }
  322. memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size);
  323. alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
  324. ocfs2_clear_local_alloc(alloc);
  325. ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check);
  326. status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode));
  327. if (status < 0)
  328. mlog_errno(status);
  329. bail:
  330. if ((status < 0) && (*alloc_copy)) {
  331. kfree(*alloc_copy);
  332. *alloc_copy = NULL;
  333. }
  334. brelse(alloc_bh);
  335. if (inode) {
  336. mutex_unlock(&inode->i_mutex);
  337. iput(inode);
  338. }
  339. mlog_exit(status);
  340. return status;
  341. }
  342. /*
  343. * Step 2: By now, we've completed the journal recovery, we've stamped
  344. * a clean local alloc on disk and dropped the node out of the
  345. * recovery map. Dlm locks will no longer stall, so lets clear out the
  346. * main bitmap.
  347. */
  348. int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
  349. struct ocfs2_dinode *alloc)
  350. {
  351. int status;
  352. handle_t *handle;
  353. struct buffer_head *main_bm_bh = NULL;
  354. struct inode *main_bm_inode;
  355. mlog_entry_void();
  356. main_bm_inode = ocfs2_get_system_file_inode(osb,
  357. GLOBAL_BITMAP_SYSTEM_INODE,
  358. OCFS2_INVALID_SLOT);
  359. if (!main_bm_inode) {
  360. status = -EINVAL;
  361. mlog_errno(status);
  362. goto out;
  363. }
  364. mutex_lock(&main_bm_inode->i_mutex);
  365. status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
  366. if (status < 0) {
  367. mlog_errno(status);
  368. goto out_mutex;
  369. }
  370. handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
  371. if (IS_ERR(handle)) {
  372. status = PTR_ERR(handle);
  373. handle = NULL;
  374. mlog_errno(status);
  375. goto out_unlock;
  376. }
  377. /* we want the bitmap change to be recorded on disk asap */
  378. handle->h_sync = 1;
  379. status = ocfs2_sync_local_to_main(osb, handle, alloc,
  380. main_bm_inode, main_bm_bh);
  381. if (status < 0)
  382. mlog_errno(status);
  383. ocfs2_commit_trans(osb, handle);
  384. out_unlock:
  385. ocfs2_inode_unlock(main_bm_inode, 1);
  386. out_mutex:
  387. mutex_unlock(&main_bm_inode->i_mutex);
  388. brelse(main_bm_bh);
  389. iput(main_bm_inode);
  390. out:
  391. if (!status)
  392. ocfs2_init_inode_steal_slot(osb);
  393. mlog_exit(status);
  394. return status;
  395. }
  396. /* Check to see if the local alloc window is within ac->ac_max_block */
  397. static int ocfs2_local_alloc_in_range(struct inode *inode,
  398. struct ocfs2_alloc_context *ac,
  399. u32 bits_wanted)
  400. {
  401. struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  402. struct ocfs2_dinode *alloc;
  403. struct ocfs2_local_alloc *la;
  404. int start;
  405. u64 block_off;
  406. if (!ac->ac_max_block)
  407. return 1;
  408. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  409. la = OCFS2_LOCAL_ALLOC(alloc);
  410. start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
  411. if (start == -1) {
  412. mlog_errno(-ENOSPC);
  413. return 0;
  414. }
  415. /*
  416. * Converting (bm_off + start + bits_wanted) to blocks gives us
  417. * the blkno just past our actual allocation. This is perfect
  418. * to compare with ac_max_block.
  419. */
  420. block_off = ocfs2_clusters_to_blocks(inode->i_sb,
  421. le32_to_cpu(la->la_bm_off) +
  422. start + bits_wanted);
  423. mlog(0, "Checking %llu against %llu\n",
  424. (unsigned long long)block_off,
  425. (unsigned long long)ac->ac_max_block);
  426. if (block_off > ac->ac_max_block)
  427. return 0;
  428. return 1;
  429. }
  430. /*
  431. * make sure we've got at least bits_wanted contiguous bits in the
  432. * local alloc. You lose them when you drop i_mutex.
  433. *
  434. * We will add ourselves to the transaction passed in, but may start
  435. * our own in order to shift windows.
  436. */
  437. int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
  438. u32 bits_wanted,
  439. struct ocfs2_alloc_context *ac)
  440. {
  441. int status;
  442. struct ocfs2_dinode *alloc;
  443. struct inode *local_alloc_inode;
  444. unsigned int free_bits;
  445. mlog_entry_void();
  446. BUG_ON(!ac);
  447. local_alloc_inode =
  448. ocfs2_get_system_file_inode(osb,
  449. LOCAL_ALLOC_SYSTEM_INODE,
  450. osb->slot_num);
  451. if (!local_alloc_inode) {
  452. status = -ENOENT;
  453. mlog_errno(status);
  454. goto bail;
  455. }
  456. mutex_lock(&local_alloc_inode->i_mutex);
  457. /*
  458. * We must double check state and allocator bits because
  459. * another process may have changed them while holding i_mutex.
  460. */
  461. spin_lock(&osb->osb_lock);
  462. if (!ocfs2_la_state_enabled(osb) ||
  463. (bits_wanted > osb->local_alloc_bits)) {
  464. spin_unlock(&osb->osb_lock);
  465. status = -ENOSPC;
  466. goto bail;
  467. }
  468. spin_unlock(&osb->osb_lock);
  469. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  470. #ifdef CONFIG_OCFS2_DEBUG_FS
  471. if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
  472. ocfs2_local_alloc_count_bits(alloc)) {
  473. ocfs2_error(osb->sb, "local alloc inode %llu says it has "
  474. "%u free bits, but a count shows %u",
  475. (unsigned long long)le64_to_cpu(alloc->i_blkno),
  476. le32_to_cpu(alloc->id1.bitmap1.i_used),
  477. ocfs2_local_alloc_count_bits(alloc));
  478. status = -EIO;
  479. goto bail;
  480. }
  481. #endif
  482. free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
  483. le32_to_cpu(alloc->id1.bitmap1.i_used);
  484. if (bits_wanted > free_bits) {
  485. /* uhoh, window change time. */
  486. status =
  487. ocfs2_local_alloc_slide_window(osb, local_alloc_inode);
  488. if (status < 0) {
  489. if (status != -ENOSPC)
  490. mlog_errno(status);
  491. goto bail;
  492. }
  493. /*
  494. * Under certain conditions, the window slide code
  495. * might have reduced the number of bits available or
  496. * disabled the the local alloc entirely. Re-check
  497. * here and return -ENOSPC if necessary.
  498. */
  499. status = -ENOSPC;
  500. if (!ocfs2_la_state_enabled(osb))
  501. goto bail;
  502. free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
  503. le32_to_cpu(alloc->id1.bitmap1.i_used);
  504. if (bits_wanted > free_bits)
  505. goto bail;
  506. }
  507. if (ac->ac_max_block)
  508. mlog(0, "Calling in_range for max block %llu\n",
  509. (unsigned long long)ac->ac_max_block);
  510. if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
  511. bits_wanted)) {
  512. /*
  513. * The window is outside ac->ac_max_block.
  514. * This errno tells the caller to keep localalloc enabled
  515. * but to get the allocation from the main bitmap.
  516. */
  517. status = -EFBIG;
  518. goto bail;
  519. }
  520. ac->ac_inode = local_alloc_inode;
  521. /* We should never use localalloc from another slot */
  522. ac->ac_alloc_slot = osb->slot_num;
  523. ac->ac_which = OCFS2_AC_USE_LOCAL;
  524. get_bh(osb->local_alloc_bh);
  525. ac->ac_bh = osb->local_alloc_bh;
  526. status = 0;
  527. bail:
  528. if (status < 0 && local_alloc_inode) {
  529. mutex_unlock(&local_alloc_inode->i_mutex);
  530. iput(local_alloc_inode);
  531. }
  532. mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num,
  533. status);
  534. mlog_exit(status);
  535. return status;
  536. }
  537. int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
  538. handle_t *handle,
  539. struct ocfs2_alloc_context *ac,
  540. u32 bits_wanted,
  541. u32 *bit_off,
  542. u32 *num_bits)
  543. {
  544. int status, start;
  545. struct inode *local_alloc_inode;
  546. void *bitmap;
  547. struct ocfs2_dinode *alloc;
  548. struct ocfs2_local_alloc *la;
  549. mlog_entry_void();
  550. BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
  551. local_alloc_inode = ac->ac_inode;
  552. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  553. la = OCFS2_LOCAL_ALLOC(alloc);
  554. start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
  555. if (start == -1) {
  556. /* TODO: Shouldn't we just BUG here? */
  557. status = -ENOSPC;
  558. mlog_errno(status);
  559. goto bail;
  560. }
  561. bitmap = la->la_bitmap;
  562. *bit_off = le32_to_cpu(la->la_bm_off) + start;
  563. /* local alloc is always contiguous by nature -- we never
  564. * delete bits from it! */
  565. *num_bits = bits_wanted;
  566. status = ocfs2_journal_access_di(handle,
  567. INODE_CACHE(local_alloc_inode),
  568. osb->local_alloc_bh,
  569. OCFS2_JOURNAL_ACCESS_WRITE);
  570. if (status < 0) {
  571. mlog_errno(status);
  572. goto bail;
  573. }
  574. while(bits_wanted--)
  575. ocfs2_set_bit(start++, bitmap);
  576. le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
  577. status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
  578. if (status < 0) {
  579. mlog_errno(status);
  580. goto bail;
  581. }
  582. status = 0;
  583. bail:
  584. mlog_exit(status);
  585. return status;
  586. }
  587. static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
  588. {
  589. int i;
  590. u8 *buffer;
  591. u32 count = 0;
  592. struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
  593. mlog_entry_void();
  594. buffer = la->la_bitmap;
  595. for (i = 0; i < le16_to_cpu(la->la_size); i++)
  596. count += hweight8(buffer[i]);
  597. mlog_exit(count);
  598. return count;
  599. }
  600. static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
  601. struct ocfs2_dinode *alloc,
  602. u32 numbits)
  603. {
  604. int numfound, bitoff, left, startoff, lastzero;
  605. void *bitmap = NULL;
  606. mlog_entry("(numbits wanted = %u)\n", numbits);
  607. if (!alloc->id1.bitmap1.i_total) {
  608. mlog(0, "No bits in my window!\n");
  609. bitoff = -1;
  610. goto bail;
  611. }
  612. bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
  613. numfound = bitoff = startoff = 0;
  614. lastzero = -1;
  615. left = le32_to_cpu(alloc->id1.bitmap1.i_total);
  616. while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) {
  617. if (bitoff == left) {
  618. /* mlog(0, "bitoff (%d) == left", bitoff); */
  619. break;
  620. }
  621. /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, "
  622. "numfound = %d\n", bitoff, startoff, numfound);*/
  623. /* Ok, we found a zero bit... is it contig. or do we
  624. * start over?*/
  625. if (bitoff == startoff) {
  626. /* we found a zero */
  627. numfound++;
  628. startoff++;
  629. } else {
  630. /* got a zero after some ones */
  631. numfound = 1;
  632. startoff = bitoff+1;
  633. }
  634. /* we got everything we needed */
  635. if (numfound == numbits) {
  636. /* mlog(0, "Found it all!\n"); */
  637. break;
  638. }
  639. }
  640. mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
  641. numfound);
  642. if (numfound == numbits)
  643. bitoff = startoff - numfound;
  644. else
  645. bitoff = -1;
  646. bail:
  647. mlog_exit(bitoff);
  648. return bitoff;
  649. }
  650. static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc)
  651. {
  652. struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
  653. int i;
  654. mlog_entry_void();
  655. alloc->id1.bitmap1.i_total = 0;
  656. alloc->id1.bitmap1.i_used = 0;
  657. la->la_bm_off = 0;
  658. for(i = 0; i < le16_to_cpu(la->la_size); i++)
  659. la->la_bitmap[i] = 0;
  660. mlog_exit_void();
  661. }
  662. #if 0
  663. /* turn this on and uncomment below to aid debugging window shifts. */
  664. static void ocfs2_verify_zero_bits(unsigned long *bitmap,
  665. unsigned int start,
  666. unsigned int count)
  667. {
  668. unsigned int tmp = count;
  669. while(tmp--) {
  670. if (ocfs2_test_bit(start + tmp, bitmap)) {
  671. printk("ocfs2_verify_zero_bits: start = %u, count = "
  672. "%u\n", start, count);
  673. printk("ocfs2_verify_zero_bits: bit %u is set!",
  674. start + tmp);
  675. BUG();
  676. }
  677. }
  678. }
  679. #endif
  680. /*
  681. * sync the local alloc to main bitmap.
  682. *
  683. * assumes you've already locked the main bitmap -- the bitmap inode
  684. * passed is used for caching.
  685. */
  686. static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
  687. handle_t *handle,
  688. struct ocfs2_dinode *alloc,
  689. struct inode *main_bm_inode,
  690. struct buffer_head *main_bm_bh)
  691. {
  692. int status = 0;
  693. int bit_off, left, count, start;
  694. u64 la_start_blk;
  695. u64 blkno;
  696. void *bitmap;
  697. struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
  698. mlog_entry("total = %u, used = %u\n",
  699. le32_to_cpu(alloc->id1.bitmap1.i_total),
  700. le32_to_cpu(alloc->id1.bitmap1.i_used));
  701. if (!alloc->id1.bitmap1.i_total) {
  702. mlog(0, "nothing to sync!\n");
  703. goto bail;
  704. }
  705. if (le32_to_cpu(alloc->id1.bitmap1.i_used) ==
  706. le32_to_cpu(alloc->id1.bitmap1.i_total)) {
  707. mlog(0, "all bits were taken!\n");
  708. goto bail;
  709. }
  710. la_start_blk = ocfs2_clusters_to_blocks(osb->sb,
  711. le32_to_cpu(la->la_bm_off));
  712. bitmap = la->la_bitmap;
  713. start = count = bit_off = 0;
  714. left = le32_to_cpu(alloc->id1.bitmap1.i_total);
  715. while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start))
  716. != -1) {
  717. if ((bit_off < left) && (bit_off == start)) {
  718. count++;
  719. start++;
  720. continue;
  721. }
  722. if (count) {
  723. blkno = la_start_blk +
  724. ocfs2_clusters_to_blocks(osb->sb,
  725. start - count);
  726. mlog(0, "freeing %u bits starting at local alloc bit "
  727. "%u (la_start_blk = %llu, blkno = %llu)\n",
  728. count, start - count,
  729. (unsigned long long)la_start_blk,
  730. (unsigned long long)blkno);
  731. status = ocfs2_free_clusters(handle, main_bm_inode,
  732. main_bm_bh, blkno, count);
  733. if (status < 0) {
  734. mlog_errno(status);
  735. goto bail;
  736. }
  737. }
  738. if (bit_off >= left)
  739. break;
  740. count = 1;
  741. start = bit_off + 1;
  742. }
  743. bail:
  744. mlog_exit(status);
  745. return status;
  746. }
  747. enum ocfs2_la_event {
  748. OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */
  749. OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has
  750. * enough bits theoretically
  751. * free, but a contiguous
  752. * allocation could not be
  753. * found. */
  754. OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have
  755. * enough bits free to satisfy
  756. * our request. */
  757. };
  758. #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ)
  759. /*
  760. * Given an event, calculate the size of our next local alloc window.
  761. *
  762. * This should always be called under i_mutex of the local alloc inode
  763. * so that local alloc disabling doesn't race with processes trying to
  764. * use the allocator.
  765. *
  766. * Returns the state which the local alloc was left in. This value can
  767. * be ignored by some paths.
  768. */
  769. static int ocfs2_recalc_la_window(struct ocfs2_super *osb,
  770. enum ocfs2_la_event event)
  771. {
  772. unsigned int bits;
  773. int state;
  774. spin_lock(&osb->osb_lock);
  775. if (osb->local_alloc_state == OCFS2_LA_DISABLED) {
  776. WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED);
  777. goto out_unlock;
  778. }
  779. /*
  780. * ENOSPC and fragmentation are treated similarly for now.
  781. */
  782. if (event == OCFS2_LA_EVENT_ENOSPC ||
  783. event == OCFS2_LA_EVENT_FRAGMENTED) {
  784. /*
  785. * We ran out of contiguous space in the primary
  786. * bitmap. Drastically reduce the number of bits used
  787. * by local alloc until we have to disable it.
  788. */
  789. bits = osb->local_alloc_bits >> 1;
  790. if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) {
  791. /*
  792. * By setting state to THROTTLED, we'll keep
  793. * the number of local alloc bits used down
  794. * until an event occurs which would give us
  795. * reason to assume the bitmap situation might
  796. * have changed.
  797. */
  798. osb->local_alloc_state = OCFS2_LA_THROTTLED;
  799. osb->local_alloc_bits = bits;
  800. } else {
  801. osb->local_alloc_state = OCFS2_LA_DISABLED;
  802. }
  803. queue_delayed_work(ocfs2_wq, &osb->la_enable_wq,
  804. OCFS2_LA_ENABLE_INTERVAL);
  805. goto out_unlock;
  806. }
  807. /*
  808. * Don't increase the size of the local alloc window until we
  809. * know we might be able to fulfill the request. Otherwise, we
  810. * risk bouncing around the global bitmap during periods of
  811. * low space.
  812. */
  813. if (osb->local_alloc_state != OCFS2_LA_THROTTLED)
  814. osb->local_alloc_bits = osb->local_alloc_default_bits;
  815. out_unlock:
  816. state = osb->local_alloc_state;
  817. spin_unlock(&osb->osb_lock);
  818. return state;
  819. }
  820. static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
  821. struct ocfs2_alloc_context **ac,
  822. struct inode **bitmap_inode,
  823. struct buffer_head **bitmap_bh)
  824. {
  825. int status;
  826. *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
  827. if (!(*ac)) {
  828. status = -ENOMEM;
  829. mlog_errno(status);
  830. goto bail;
  831. }
  832. retry_enospc:
  833. (*ac)->ac_bits_wanted = osb->local_alloc_bits;
  834. status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
  835. if (status == -ENOSPC) {
  836. if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
  837. OCFS2_LA_DISABLED)
  838. goto bail;
  839. ocfs2_free_ac_resource(*ac);
  840. memset(*ac, 0, sizeof(struct ocfs2_alloc_context));
  841. goto retry_enospc;
  842. }
  843. if (status < 0) {
  844. mlog_errno(status);
  845. goto bail;
  846. }
  847. *bitmap_inode = (*ac)->ac_inode;
  848. igrab(*bitmap_inode);
  849. *bitmap_bh = (*ac)->ac_bh;
  850. get_bh(*bitmap_bh);
  851. status = 0;
  852. bail:
  853. if ((status < 0) && *ac) {
  854. ocfs2_free_alloc_context(*ac);
  855. *ac = NULL;
  856. }
  857. mlog_exit(status);
  858. return status;
  859. }
  860. /*
  861. * pass it the bitmap lock in lock_bh if you have it.
  862. */
  863. static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
  864. handle_t *handle,
  865. struct ocfs2_alloc_context *ac)
  866. {
  867. int status = 0;
  868. u32 cluster_off, cluster_count;
  869. struct ocfs2_dinode *alloc = NULL;
  870. struct ocfs2_local_alloc *la;
  871. mlog_entry_void();
  872. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  873. la = OCFS2_LOCAL_ALLOC(alloc);
  874. if (alloc->id1.bitmap1.i_total)
  875. mlog(0, "asking me to alloc a new window over a non-empty "
  876. "one\n");
  877. mlog(0, "Allocating %u clusters for a new window.\n",
  878. osb->local_alloc_bits);
  879. /* Instruct the allocation code to try the most recently used
  880. * cluster group. We'll re-record the group used this pass
  881. * below. */
  882. ac->ac_last_group = osb->la_last_gd;
  883. /* we used the generic suballoc reserve function, but we set
  884. * everything up nicely, so there's no reason why we can't use
  885. * the more specific cluster api to claim bits. */
  886. status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits,
  887. &cluster_off, &cluster_count);
  888. if (status == -ENOSPC) {
  889. retry_enospc:
  890. /*
  891. * Note: We could also try syncing the journal here to
  892. * allow use of any free bits which the current
  893. * transaction can't give us access to. --Mark
  894. */
  895. if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) ==
  896. OCFS2_LA_DISABLED)
  897. goto bail;
  898. status = ocfs2_claim_clusters(osb, handle, ac,
  899. osb->local_alloc_bits,
  900. &cluster_off,
  901. &cluster_count);
  902. if (status == -ENOSPC)
  903. goto retry_enospc;
  904. /*
  905. * We only shrunk the *minimum* number of in our
  906. * request - it's entirely possible that the allocator
  907. * might give us more than we asked for.
  908. */
  909. if (status == 0) {
  910. spin_lock(&osb->osb_lock);
  911. osb->local_alloc_bits = cluster_count;
  912. spin_unlock(&osb->osb_lock);
  913. }
  914. }
  915. if (status < 0) {
  916. if (status != -ENOSPC)
  917. mlog_errno(status);
  918. goto bail;
  919. }
  920. osb->la_last_gd = ac->ac_last_group;
  921. la->la_bm_off = cpu_to_le32(cluster_off);
  922. alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count);
  923. /* just in case... In the future when we find space ourselves,
  924. * we don't have to get all contiguous -- but we'll have to
  925. * set all previously used bits in bitmap and update
  926. * la_bits_set before setting the bits in the main bitmap. */
  927. alloc->id1.bitmap1.i_used = 0;
  928. memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
  929. le16_to_cpu(la->la_size));
  930. mlog(0, "New window allocated:\n");
  931. mlog(0, "window la_bm_off = %u\n",
  932. OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
  933. mlog(0, "window bits = %u\n", le32_to_cpu(alloc->id1.bitmap1.i_total));
  934. bail:
  935. mlog_exit(status);
  936. return status;
  937. }
  938. /* Note that we do *NOT* lock the local alloc inode here as
  939. * it's been locked already for us. */
  940. static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
  941. struct inode *local_alloc_inode)
  942. {
  943. int status = 0;
  944. struct buffer_head *main_bm_bh = NULL;
  945. struct inode *main_bm_inode = NULL;
  946. handle_t *handle = NULL;
  947. struct ocfs2_dinode *alloc;
  948. struct ocfs2_dinode *alloc_copy = NULL;
  949. struct ocfs2_alloc_context *ac = NULL;
  950. mlog_entry_void();
  951. ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE);
  952. /* This will lock the main bitmap for us. */
  953. status = ocfs2_local_alloc_reserve_for_window(osb,
  954. &ac,
  955. &main_bm_inode,
  956. &main_bm_bh);
  957. if (status < 0) {
  958. if (status != -ENOSPC)
  959. mlog_errno(status);
  960. goto bail;
  961. }
  962. handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
  963. if (IS_ERR(handle)) {
  964. status = PTR_ERR(handle);
  965. handle = NULL;
  966. mlog_errno(status);
  967. goto bail;
  968. }
  969. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  970. /* We want to clear the local alloc before doing anything
  971. * else, so that if we error later during this operation,
  972. * local alloc shutdown won't try to double free main bitmap
  973. * bits. Make a copy so the sync function knows which bits to
  974. * free. */
  975. alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS);
  976. if (!alloc_copy) {
  977. status = -ENOMEM;
  978. mlog_errno(status);
  979. goto bail;
  980. }
  981. memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
  982. status = ocfs2_journal_access_di(handle,
  983. INODE_CACHE(local_alloc_inode),
  984. osb->local_alloc_bh,
  985. OCFS2_JOURNAL_ACCESS_WRITE);
  986. if (status < 0) {
  987. mlog_errno(status);
  988. goto bail;
  989. }
  990. ocfs2_clear_local_alloc(alloc);
  991. status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
  992. if (status < 0) {
  993. mlog_errno(status);
  994. goto bail;
  995. }
  996. status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
  997. main_bm_inode, main_bm_bh);
  998. if (status < 0) {
  999. mlog_errno(status);
  1000. goto bail;
  1001. }
  1002. status = ocfs2_local_alloc_new_window(osb, handle, ac);
  1003. if (status < 0) {
  1004. if (status != -ENOSPC)
  1005. mlog_errno(status);
  1006. goto bail;
  1007. }
  1008. atomic_inc(&osb->alloc_stats.moves);
  1009. status = 0;
  1010. bail:
  1011. if (handle)
  1012. ocfs2_commit_trans(osb, handle);
  1013. brelse(main_bm_bh);
  1014. if (main_bm_inode)
  1015. iput(main_bm_inode);
  1016. if (alloc_copy)
  1017. kfree(alloc_copy);
  1018. if (ac)
  1019. ocfs2_free_alloc_context(ac);
  1020. mlog_exit(status);
  1021. return status;
  1022. }