localalloc.c 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210
  1. /* -*- mode: c; c-basic-offset: 8; -*-
  2. * vim: noexpandtab sw=8 ts=8 sts=0:
  3. *
  4. * localalloc.c
  5. *
  6. * Node local data allocation
  7. *
  8. * Copyright (C) 2002, 2004 Oracle. All rights reserved.
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2 of the License, or (at your option) any later version.
  14. *
  15. * This program is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU General Public
  21. * License along with this program; if not, write to the
  22. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23. * Boston, MA 021110-1307, USA.
  24. */
  25. #include <linux/fs.h>
  26. #include <linux/types.h>
  27. #include <linux/slab.h>
  28. #include <linux/highmem.h>
  29. #include <linux/bitops.h>
  30. #define MLOG_MASK_PREFIX ML_DISK_ALLOC
  31. #include <cluster/masklog.h>
  32. #include "ocfs2.h"
  33. #include "alloc.h"
  34. #include "blockcheck.h"
  35. #include "dlmglue.h"
  36. #include "inode.h"
  37. #include "journal.h"
  38. #include "localalloc.h"
  39. #include "suballoc.h"
  40. #include "super.h"
  41. #include "sysfile.h"
  42. #include "buffer_head_io.h"
  43. #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab))
  44. static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
  45. static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
  46. struct ocfs2_dinode *alloc,
  47. u32 numbits);
  48. static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
  49. static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
  50. handle_t *handle,
  51. struct ocfs2_dinode *alloc,
  52. struct inode *main_bm_inode,
  53. struct buffer_head *main_bm_bh);
  54. static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
  55. struct ocfs2_alloc_context **ac,
  56. struct inode **bitmap_inode,
  57. struct buffer_head **bitmap_bh);
  58. static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
  59. handle_t *handle,
  60. struct ocfs2_alloc_context *ac);
  61. static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
  62. struct inode *local_alloc_inode);
  63. static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
  64. {
  65. return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
  66. osb->local_alloc_state == OCFS2_LA_ENABLED);
  67. }
  68. void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
  69. unsigned int num_clusters)
  70. {
  71. spin_lock(&osb->osb_lock);
  72. if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
  73. osb->local_alloc_state == OCFS2_LA_THROTTLED)
  74. if (num_clusters >= osb->local_alloc_default_bits) {
  75. cancel_delayed_work(&osb->la_enable_wq);
  76. osb->local_alloc_state = OCFS2_LA_ENABLED;
  77. }
  78. spin_unlock(&osb->osb_lock);
  79. }
  80. void ocfs2_la_enable_worker(struct work_struct *work)
  81. {
  82. struct ocfs2_super *osb =
  83. container_of(work, struct ocfs2_super,
  84. la_enable_wq.work);
  85. spin_lock(&osb->osb_lock);
  86. osb->local_alloc_state = OCFS2_LA_ENABLED;
  87. spin_unlock(&osb->osb_lock);
  88. }
  89. /*
  90. * Tell us whether a given allocation should use the local alloc
  91. * file. Otherwise, it has to go to the main bitmap.
  92. *
  93. * This function does semi-dirty reads of local alloc size and state!
  94. * This is ok however, as the values are re-checked once under mutex.
  95. */
  96. int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
  97. {
  98. int ret = 0;
  99. int la_bits;
  100. spin_lock(&osb->osb_lock);
  101. la_bits = osb->local_alloc_bits;
  102. if (!ocfs2_la_state_enabled(osb))
  103. goto bail;
  104. /* la_bits should be at least twice the size (in clusters) of
  105. * a new block group. We want to be sure block group
  106. * allocations go through the local alloc, so allow an
  107. * allocation to take up to half the bitmap. */
  108. if (bits > (la_bits / 2))
  109. goto bail;
  110. ret = 1;
  111. bail:
  112. mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n",
  113. osb->local_alloc_state, (unsigned long long)bits, la_bits, ret);
  114. spin_unlock(&osb->osb_lock);
  115. return ret;
  116. }
  117. int ocfs2_load_local_alloc(struct ocfs2_super *osb)
  118. {
  119. int status = 0;
  120. struct ocfs2_dinode *alloc = NULL;
  121. struct buffer_head *alloc_bh = NULL;
  122. u32 num_used;
  123. struct inode *inode = NULL;
  124. struct ocfs2_local_alloc *la;
  125. mlog_entry_void();
  126. if (osb->local_alloc_bits == 0)
  127. goto bail;
  128. if (osb->local_alloc_bits >= osb->bitmap_cpg) {
  129. mlog(ML_NOTICE, "Requested local alloc window %d is larger "
  130. "than max possible %u. Using defaults.\n",
  131. osb->local_alloc_bits, (osb->bitmap_cpg - 1));
  132. osb->local_alloc_bits =
  133. ocfs2_megabytes_to_clusters(osb->sb,
  134. OCFS2_DEFAULT_LOCAL_ALLOC_SIZE);
  135. }
  136. /* read the alloc off disk */
  137. inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE,
  138. osb->slot_num);
  139. if (!inode) {
  140. status = -EINVAL;
  141. mlog_errno(status);
  142. goto bail;
  143. }
  144. status = ocfs2_read_inode_block_full(inode, &alloc_bh,
  145. OCFS2_BH_IGNORE_CACHE);
  146. if (status < 0) {
  147. mlog_errno(status);
  148. goto bail;
  149. }
  150. alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
  151. la = OCFS2_LOCAL_ALLOC(alloc);
  152. if (!(le32_to_cpu(alloc->i_flags) &
  153. (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) {
  154. mlog(ML_ERROR, "Invalid local alloc inode, %llu\n",
  155. (unsigned long long)OCFS2_I(inode)->ip_blkno);
  156. status = -EINVAL;
  157. goto bail;
  158. }
  159. if ((la->la_size == 0) ||
  160. (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) {
  161. mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n",
  162. le16_to_cpu(la->la_size));
  163. status = -EINVAL;
  164. goto bail;
  165. }
  166. /* do a little verification. */
  167. num_used = ocfs2_local_alloc_count_bits(alloc);
  168. /* hopefully the local alloc has always been recovered before
  169. * we load it. */
  170. if (num_used
  171. || alloc->id1.bitmap1.i_used
  172. || alloc->id1.bitmap1.i_total
  173. || la->la_bm_off)
  174. mlog(ML_ERROR, "Local alloc hasn't been recovered!\n"
  175. "found = %u, set = %u, taken = %u, off = %u\n",
  176. num_used, le32_to_cpu(alloc->id1.bitmap1.i_used),
  177. le32_to_cpu(alloc->id1.bitmap1.i_total),
  178. OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
  179. osb->local_alloc_bh = alloc_bh;
  180. osb->local_alloc_state = OCFS2_LA_ENABLED;
  181. bail:
  182. if (status < 0)
  183. brelse(alloc_bh);
  184. if (inode)
  185. iput(inode);
  186. mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits);
  187. mlog_exit(status);
  188. return status;
  189. }
  190. /*
  191. * return any unused bits to the bitmap and write out a clean
  192. * local_alloc.
  193. *
  194. * local_alloc_bh is optional. If not passed, we will simply use the
  195. * one off osb. If you do pass it however, be warned that it *will* be
  196. * returned brelse'd and NULL'd out.*/
  197. void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
  198. {
  199. int status;
  200. handle_t *handle;
  201. struct inode *local_alloc_inode = NULL;
  202. struct buffer_head *bh = NULL;
  203. struct buffer_head *main_bm_bh = NULL;
  204. struct inode *main_bm_inode = NULL;
  205. struct ocfs2_dinode *alloc_copy = NULL;
  206. struct ocfs2_dinode *alloc = NULL;
  207. mlog_entry_void();
  208. cancel_delayed_work(&osb->la_enable_wq);
  209. flush_workqueue(ocfs2_wq);
  210. if (osb->local_alloc_state == OCFS2_LA_UNUSED)
  211. goto out;
  212. local_alloc_inode =
  213. ocfs2_get_system_file_inode(osb,
  214. LOCAL_ALLOC_SYSTEM_INODE,
  215. osb->slot_num);
  216. if (!local_alloc_inode) {
  217. status = -ENOENT;
  218. mlog_errno(status);
  219. goto out;
  220. }
  221. osb->local_alloc_state = OCFS2_LA_DISABLED;
  222. main_bm_inode = ocfs2_get_system_file_inode(osb,
  223. GLOBAL_BITMAP_SYSTEM_INODE,
  224. OCFS2_INVALID_SLOT);
  225. if (!main_bm_inode) {
  226. status = -EINVAL;
  227. mlog_errno(status);
  228. goto out;
  229. }
  230. mutex_lock(&main_bm_inode->i_mutex);
  231. status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
  232. if (status < 0) {
  233. mlog_errno(status);
  234. goto out_mutex;
  235. }
  236. /* WINDOW_MOVE_CREDITS is a bit heavy... */
  237. handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
  238. if (IS_ERR(handle)) {
  239. mlog_errno(PTR_ERR(handle));
  240. handle = NULL;
  241. goto out_unlock;
  242. }
  243. bh = osb->local_alloc_bh;
  244. alloc = (struct ocfs2_dinode *) bh->b_data;
  245. alloc_copy = kmalloc(bh->b_size, GFP_NOFS);
  246. if (!alloc_copy) {
  247. status = -ENOMEM;
  248. goto out_commit;
  249. }
  250. memcpy(alloc_copy, alloc, bh->b_size);
  251. status = ocfs2_journal_access_di(handle, local_alloc_inode, bh,
  252. OCFS2_JOURNAL_ACCESS_WRITE);
  253. if (status < 0) {
  254. mlog_errno(status);
  255. goto out_commit;
  256. }
  257. ocfs2_clear_local_alloc(alloc);
  258. status = ocfs2_journal_dirty(handle, bh);
  259. if (status < 0) {
  260. mlog_errno(status);
  261. goto out_commit;
  262. }
  263. brelse(bh);
  264. osb->local_alloc_bh = NULL;
  265. osb->local_alloc_state = OCFS2_LA_UNUSED;
  266. status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
  267. main_bm_inode, main_bm_bh);
  268. if (status < 0)
  269. mlog_errno(status);
  270. out_commit:
  271. ocfs2_commit_trans(osb, handle);
  272. out_unlock:
  273. brelse(main_bm_bh);
  274. ocfs2_inode_unlock(main_bm_inode, 1);
  275. out_mutex:
  276. mutex_unlock(&main_bm_inode->i_mutex);
  277. iput(main_bm_inode);
  278. out:
  279. if (local_alloc_inode)
  280. iput(local_alloc_inode);
  281. if (alloc_copy)
  282. kfree(alloc_copy);
  283. mlog_exit_void();
  284. }
  285. /*
  286. * We want to free the bitmap bits outside of any recovery context as
  287. * we'll need a cluster lock to do so, but we must clear the local
  288. * alloc before giving up the recovered nodes journal. To solve this,
  289. * we kmalloc a copy of the local alloc before it's change for the
  290. * caller to process with ocfs2_complete_local_alloc_recovery
  291. */
  292. int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
  293. int slot_num,
  294. struct ocfs2_dinode **alloc_copy)
  295. {
  296. int status = 0;
  297. struct buffer_head *alloc_bh = NULL;
  298. struct inode *inode = NULL;
  299. struct ocfs2_dinode *alloc;
  300. mlog_entry("(slot_num = %d)\n", slot_num);
  301. *alloc_copy = NULL;
  302. inode = ocfs2_get_system_file_inode(osb,
  303. LOCAL_ALLOC_SYSTEM_INODE,
  304. slot_num);
  305. if (!inode) {
  306. status = -EINVAL;
  307. mlog_errno(status);
  308. goto bail;
  309. }
  310. mutex_lock(&inode->i_mutex);
  311. status = ocfs2_read_inode_block_full(inode, &alloc_bh,
  312. OCFS2_BH_IGNORE_CACHE);
  313. if (status < 0) {
  314. mlog_errno(status);
  315. goto bail;
  316. }
  317. *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL);
  318. if (!(*alloc_copy)) {
  319. status = -ENOMEM;
  320. goto bail;
  321. }
  322. memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size);
  323. alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
  324. ocfs2_clear_local_alloc(alloc);
  325. ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check);
  326. status = ocfs2_write_block(osb, alloc_bh, inode);
  327. if (status < 0)
  328. mlog_errno(status);
  329. bail:
  330. if ((status < 0) && (*alloc_copy)) {
  331. kfree(*alloc_copy);
  332. *alloc_copy = NULL;
  333. }
  334. brelse(alloc_bh);
  335. if (inode) {
  336. mutex_unlock(&inode->i_mutex);
  337. iput(inode);
  338. }
  339. mlog_exit(status);
  340. return status;
  341. }
  342. /*
  343. * Step 2: By now, we've completed the journal recovery, we've stamped
  344. * a clean local alloc on disk and dropped the node out of the
  345. * recovery map. Dlm locks will no longer stall, so lets clear out the
  346. * main bitmap.
  347. */
  348. int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
  349. struct ocfs2_dinode *alloc)
  350. {
  351. int status;
  352. handle_t *handle;
  353. struct buffer_head *main_bm_bh = NULL;
  354. struct inode *main_bm_inode;
  355. mlog_entry_void();
  356. main_bm_inode = ocfs2_get_system_file_inode(osb,
  357. GLOBAL_BITMAP_SYSTEM_INODE,
  358. OCFS2_INVALID_SLOT);
  359. if (!main_bm_inode) {
  360. status = -EINVAL;
  361. mlog_errno(status);
  362. goto out;
  363. }
  364. mutex_lock(&main_bm_inode->i_mutex);
  365. status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
  366. if (status < 0) {
  367. mlog_errno(status);
  368. goto out_mutex;
  369. }
  370. handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
  371. if (IS_ERR(handle)) {
  372. status = PTR_ERR(handle);
  373. handle = NULL;
  374. mlog_errno(status);
  375. goto out_unlock;
  376. }
  377. /* we want the bitmap change to be recorded on disk asap */
  378. handle->h_sync = 1;
  379. status = ocfs2_sync_local_to_main(osb, handle, alloc,
  380. main_bm_inode, main_bm_bh);
  381. if (status < 0)
  382. mlog_errno(status);
  383. ocfs2_commit_trans(osb, handle);
  384. out_unlock:
  385. ocfs2_inode_unlock(main_bm_inode, 1);
  386. out_mutex:
  387. mutex_unlock(&main_bm_inode->i_mutex);
  388. brelse(main_bm_bh);
  389. iput(main_bm_inode);
  390. out:
  391. if (!status)
  392. ocfs2_init_inode_steal_slot(osb);
  393. mlog_exit(status);
  394. return status;
  395. }
  396. /* Check to see if the local alloc window is within ac->ac_max_block */
  397. static int ocfs2_local_alloc_in_range(struct inode *inode,
  398. struct ocfs2_alloc_context *ac,
  399. u32 bits_wanted)
  400. {
  401. struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  402. struct ocfs2_dinode *alloc;
  403. struct ocfs2_local_alloc *la;
  404. int start;
  405. u64 block_off;
  406. if (!ac->ac_max_block)
  407. return 1;
  408. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  409. la = OCFS2_LOCAL_ALLOC(alloc);
  410. start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
  411. if (start == -1) {
  412. mlog_errno(-ENOSPC);
  413. return 0;
  414. }
  415. /*
  416. * Converting (bm_off + start + bits_wanted) to blocks gives us
  417. * the blkno just past our actual allocation. This is perfect
  418. * to compare with ac_max_block.
  419. */
  420. block_off = ocfs2_clusters_to_blocks(inode->i_sb,
  421. le32_to_cpu(la->la_bm_off) +
  422. start + bits_wanted);
  423. mlog(0, "Checking %llu against %llu\n",
  424. (unsigned long long)block_off,
  425. (unsigned long long)ac->ac_max_block);
  426. if (block_off > ac->ac_max_block)
  427. return 0;
  428. return 1;
  429. }
  430. /*
  431. * make sure we've got at least bits_wanted contiguous bits in the
  432. * local alloc. You lose them when you drop i_mutex.
  433. *
  434. * We will add ourselves to the transaction passed in, but may start
  435. * our own in order to shift windows.
  436. */
  437. int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
  438. u32 bits_wanted,
  439. struct ocfs2_alloc_context *ac)
  440. {
  441. int status;
  442. struct ocfs2_dinode *alloc;
  443. struct inode *local_alloc_inode;
  444. unsigned int free_bits;
  445. mlog_entry_void();
  446. BUG_ON(!ac);
  447. local_alloc_inode =
  448. ocfs2_get_system_file_inode(osb,
  449. LOCAL_ALLOC_SYSTEM_INODE,
  450. osb->slot_num);
  451. if (!local_alloc_inode) {
  452. status = -ENOENT;
  453. mlog_errno(status);
  454. goto bail;
  455. }
  456. mutex_lock(&local_alloc_inode->i_mutex);
  457. /*
  458. * We must double check state and allocator bits because
  459. * another process may have changed them while holding i_mutex.
  460. */
  461. spin_lock(&osb->osb_lock);
  462. if (!ocfs2_la_state_enabled(osb) ||
  463. (bits_wanted > osb->local_alloc_bits)) {
  464. spin_unlock(&osb->osb_lock);
  465. status = -ENOSPC;
  466. goto bail;
  467. }
  468. spin_unlock(&osb->osb_lock);
  469. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  470. #ifdef CONFIG_OCFS2_DEBUG_FS
  471. if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
  472. ocfs2_local_alloc_count_bits(alloc)) {
  473. ocfs2_error(osb->sb, "local alloc inode %llu says it has "
  474. "%u free bits, but a count shows %u",
  475. (unsigned long long)le64_to_cpu(alloc->i_blkno),
  476. le32_to_cpu(alloc->id1.bitmap1.i_used),
  477. ocfs2_local_alloc_count_bits(alloc));
  478. status = -EIO;
  479. goto bail;
  480. }
  481. #endif
  482. free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
  483. le32_to_cpu(alloc->id1.bitmap1.i_used);
  484. if (bits_wanted > free_bits) {
  485. /* uhoh, window change time. */
  486. status =
  487. ocfs2_local_alloc_slide_window(osb, local_alloc_inode);
  488. if (status < 0) {
  489. if (status != -ENOSPC)
  490. mlog_errno(status);
  491. goto bail;
  492. }
  493. /*
  494. * Under certain conditions, the window slide code
  495. * might have reduced the number of bits available or
  496. * disabled the the local alloc entirely. Re-check
  497. * here and return -ENOSPC if necessary.
  498. */
  499. status = -ENOSPC;
  500. if (!ocfs2_la_state_enabled(osb))
  501. goto bail;
  502. free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
  503. le32_to_cpu(alloc->id1.bitmap1.i_used);
  504. if (bits_wanted > free_bits)
  505. goto bail;
  506. }
  507. if (ac->ac_max_block)
  508. mlog(0, "Calling in_range for max block %llu\n",
  509. (unsigned long long)ac->ac_max_block);
  510. if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
  511. bits_wanted)) {
  512. /*
  513. * The window is outside ac->ac_max_block.
  514. * This errno tells the caller to keep localalloc enabled
  515. * but to get the allocation from the main bitmap.
  516. */
  517. status = -EFBIG;
  518. goto bail;
  519. }
  520. ac->ac_inode = local_alloc_inode;
  521. /* We should never use localalloc from another slot */
  522. ac->ac_alloc_slot = osb->slot_num;
  523. ac->ac_which = OCFS2_AC_USE_LOCAL;
  524. get_bh(osb->local_alloc_bh);
  525. ac->ac_bh = osb->local_alloc_bh;
  526. status = 0;
  527. bail:
  528. if (status < 0 && local_alloc_inode) {
  529. mutex_unlock(&local_alloc_inode->i_mutex);
  530. iput(local_alloc_inode);
  531. }
  532. mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num,
  533. status);
  534. mlog_exit(status);
  535. return status;
  536. }
  537. int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
  538. handle_t *handle,
  539. struct ocfs2_alloc_context *ac,
  540. u32 bits_wanted,
  541. u32 *bit_off,
  542. u32 *num_bits)
  543. {
  544. int status, start;
  545. struct inode *local_alloc_inode;
  546. void *bitmap;
  547. struct ocfs2_dinode *alloc;
  548. struct ocfs2_local_alloc *la;
  549. mlog_entry_void();
  550. BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
  551. local_alloc_inode = ac->ac_inode;
  552. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  553. la = OCFS2_LOCAL_ALLOC(alloc);
  554. start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
  555. if (start == -1) {
  556. /* TODO: Shouldn't we just BUG here? */
  557. status = -ENOSPC;
  558. mlog_errno(status);
  559. goto bail;
  560. }
  561. bitmap = la->la_bitmap;
  562. *bit_off = le32_to_cpu(la->la_bm_off) + start;
  563. /* local alloc is always contiguous by nature -- we never
  564. * delete bits from it! */
  565. *num_bits = bits_wanted;
  566. status = ocfs2_journal_access_di(handle, local_alloc_inode,
  567. osb->local_alloc_bh,
  568. OCFS2_JOURNAL_ACCESS_WRITE);
  569. if (status < 0) {
  570. mlog_errno(status);
  571. goto bail;
  572. }
  573. while(bits_wanted--)
  574. ocfs2_set_bit(start++, bitmap);
  575. le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
  576. status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
  577. if (status < 0) {
  578. mlog_errno(status);
  579. goto bail;
  580. }
  581. status = 0;
  582. bail:
  583. mlog_exit(status);
  584. return status;
  585. }
  586. static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
  587. {
  588. int i;
  589. u8 *buffer;
  590. u32 count = 0;
  591. struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
  592. mlog_entry_void();
  593. buffer = la->la_bitmap;
  594. for (i = 0; i < le16_to_cpu(la->la_size); i++)
  595. count += hweight8(buffer[i]);
  596. mlog_exit(count);
  597. return count;
  598. }
  599. static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
  600. struct ocfs2_dinode *alloc,
  601. u32 numbits)
  602. {
  603. int numfound, bitoff, left, startoff, lastzero;
  604. void *bitmap = NULL;
  605. mlog_entry("(numbits wanted = %u)\n", numbits);
  606. if (!alloc->id1.bitmap1.i_total) {
  607. mlog(0, "No bits in my window!\n");
  608. bitoff = -1;
  609. goto bail;
  610. }
  611. bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
  612. numfound = bitoff = startoff = 0;
  613. lastzero = -1;
  614. left = le32_to_cpu(alloc->id1.bitmap1.i_total);
  615. while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) {
  616. if (bitoff == left) {
  617. /* mlog(0, "bitoff (%d) == left", bitoff); */
  618. break;
  619. }
  620. /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, "
  621. "numfound = %d\n", bitoff, startoff, numfound);*/
  622. /* Ok, we found a zero bit... is it contig. or do we
  623. * start over?*/
  624. if (bitoff == startoff) {
  625. /* we found a zero */
  626. numfound++;
  627. startoff++;
  628. } else {
  629. /* got a zero after some ones */
  630. numfound = 1;
  631. startoff = bitoff+1;
  632. }
  633. /* we got everything we needed */
  634. if (numfound == numbits) {
  635. /* mlog(0, "Found it all!\n"); */
  636. break;
  637. }
  638. }
  639. mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
  640. numfound);
  641. if (numfound == numbits)
  642. bitoff = startoff - numfound;
  643. else
  644. bitoff = -1;
  645. bail:
  646. mlog_exit(bitoff);
  647. return bitoff;
  648. }
  649. static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc)
  650. {
  651. struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
  652. int i;
  653. mlog_entry_void();
  654. alloc->id1.bitmap1.i_total = 0;
  655. alloc->id1.bitmap1.i_used = 0;
  656. la->la_bm_off = 0;
  657. for(i = 0; i < le16_to_cpu(la->la_size); i++)
  658. la->la_bitmap[i] = 0;
  659. mlog_exit_void();
  660. }
  661. #if 0
  662. /* turn this on and uncomment below to aid debugging window shifts. */
  663. static void ocfs2_verify_zero_bits(unsigned long *bitmap,
  664. unsigned int start,
  665. unsigned int count)
  666. {
  667. unsigned int tmp = count;
  668. while(tmp--) {
  669. if (ocfs2_test_bit(start + tmp, bitmap)) {
  670. printk("ocfs2_verify_zero_bits: start = %u, count = "
  671. "%u\n", start, count);
  672. printk("ocfs2_verify_zero_bits: bit %u is set!",
  673. start + tmp);
  674. BUG();
  675. }
  676. }
  677. }
  678. #endif
  679. /*
  680. * sync the local alloc to main bitmap.
  681. *
  682. * assumes you've already locked the main bitmap -- the bitmap inode
  683. * passed is used for caching.
  684. */
  685. static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
  686. handle_t *handle,
  687. struct ocfs2_dinode *alloc,
  688. struct inode *main_bm_inode,
  689. struct buffer_head *main_bm_bh)
  690. {
  691. int status = 0;
  692. int bit_off, left, count, start;
  693. u64 la_start_blk;
  694. u64 blkno;
  695. void *bitmap;
  696. struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
  697. mlog_entry("total = %u, used = %u\n",
  698. le32_to_cpu(alloc->id1.bitmap1.i_total),
  699. le32_to_cpu(alloc->id1.bitmap1.i_used));
  700. if (!alloc->id1.bitmap1.i_total) {
  701. mlog(0, "nothing to sync!\n");
  702. goto bail;
  703. }
  704. if (le32_to_cpu(alloc->id1.bitmap1.i_used) ==
  705. le32_to_cpu(alloc->id1.bitmap1.i_total)) {
  706. mlog(0, "all bits were taken!\n");
  707. goto bail;
  708. }
  709. la_start_blk = ocfs2_clusters_to_blocks(osb->sb,
  710. le32_to_cpu(la->la_bm_off));
  711. bitmap = la->la_bitmap;
  712. start = count = bit_off = 0;
  713. left = le32_to_cpu(alloc->id1.bitmap1.i_total);
  714. while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start))
  715. != -1) {
  716. if ((bit_off < left) && (bit_off == start)) {
  717. count++;
  718. start++;
  719. continue;
  720. }
  721. if (count) {
  722. blkno = la_start_blk +
  723. ocfs2_clusters_to_blocks(osb->sb,
  724. start - count);
  725. mlog(0, "freeing %u bits starting at local alloc bit "
  726. "%u (la_start_blk = %llu, blkno = %llu)\n",
  727. count, start - count,
  728. (unsigned long long)la_start_blk,
  729. (unsigned long long)blkno);
  730. status = ocfs2_free_clusters(handle, main_bm_inode,
  731. main_bm_bh, blkno, count);
  732. if (status < 0) {
  733. mlog_errno(status);
  734. goto bail;
  735. }
  736. }
  737. if (bit_off >= left)
  738. break;
  739. count = 1;
  740. start = bit_off + 1;
  741. }
  742. bail:
  743. mlog_exit(status);
  744. return status;
  745. }
  746. enum ocfs2_la_event {
  747. OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */
  748. OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has
  749. * enough bits theoretically
  750. * free, but a contiguous
  751. * allocation could not be
  752. * found. */
  753. OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have
  754. * enough bits free to satisfy
  755. * our request. */
  756. };
  757. #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ)
  758. /*
  759. * Given an event, calculate the size of our next local alloc window.
  760. *
  761. * This should always be called under i_mutex of the local alloc inode
  762. * so that local alloc disabling doesn't race with processes trying to
  763. * use the allocator.
  764. *
  765. * Returns the state which the local alloc was left in. This value can
  766. * be ignored by some paths.
  767. */
  768. static int ocfs2_recalc_la_window(struct ocfs2_super *osb,
  769. enum ocfs2_la_event event)
  770. {
  771. unsigned int bits;
  772. int state;
  773. spin_lock(&osb->osb_lock);
  774. if (osb->local_alloc_state == OCFS2_LA_DISABLED) {
  775. WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED);
  776. goto out_unlock;
  777. }
  778. /*
  779. * ENOSPC and fragmentation are treated similarly for now.
  780. */
  781. if (event == OCFS2_LA_EVENT_ENOSPC ||
  782. event == OCFS2_LA_EVENT_FRAGMENTED) {
  783. /*
  784. * We ran out of contiguous space in the primary
  785. * bitmap. Drastically reduce the number of bits used
  786. * by local alloc until we have to disable it.
  787. */
  788. bits = osb->local_alloc_bits >> 1;
  789. if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) {
  790. /*
  791. * By setting state to THROTTLED, we'll keep
  792. * the number of local alloc bits used down
  793. * until an event occurs which would give us
  794. * reason to assume the bitmap situation might
  795. * have changed.
  796. */
  797. osb->local_alloc_state = OCFS2_LA_THROTTLED;
  798. osb->local_alloc_bits = bits;
  799. } else {
  800. osb->local_alloc_state = OCFS2_LA_DISABLED;
  801. }
  802. queue_delayed_work(ocfs2_wq, &osb->la_enable_wq,
  803. OCFS2_LA_ENABLE_INTERVAL);
  804. goto out_unlock;
  805. }
  806. /*
  807. * Don't increase the size of the local alloc window until we
  808. * know we might be able to fulfill the request. Otherwise, we
  809. * risk bouncing around the global bitmap during periods of
  810. * low space.
  811. */
  812. if (osb->local_alloc_state != OCFS2_LA_THROTTLED)
  813. osb->local_alloc_bits = osb->local_alloc_default_bits;
  814. out_unlock:
  815. state = osb->local_alloc_state;
  816. spin_unlock(&osb->osb_lock);
  817. return state;
  818. }
  819. static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
  820. struct ocfs2_alloc_context **ac,
  821. struct inode **bitmap_inode,
  822. struct buffer_head **bitmap_bh)
  823. {
  824. int status;
  825. *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
  826. if (!(*ac)) {
  827. status = -ENOMEM;
  828. mlog_errno(status);
  829. goto bail;
  830. }
  831. retry_enospc:
  832. (*ac)->ac_bits_wanted = osb->local_alloc_bits;
  833. status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
  834. if (status == -ENOSPC) {
  835. if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
  836. OCFS2_LA_DISABLED)
  837. goto bail;
  838. ocfs2_free_ac_resource(*ac);
  839. memset(*ac, 0, sizeof(struct ocfs2_alloc_context));
  840. goto retry_enospc;
  841. }
  842. if (status < 0) {
  843. mlog_errno(status);
  844. goto bail;
  845. }
  846. *bitmap_inode = (*ac)->ac_inode;
  847. igrab(*bitmap_inode);
  848. *bitmap_bh = (*ac)->ac_bh;
  849. get_bh(*bitmap_bh);
  850. status = 0;
  851. bail:
  852. if ((status < 0) && *ac) {
  853. ocfs2_free_alloc_context(*ac);
  854. *ac = NULL;
  855. }
  856. mlog_exit(status);
  857. return status;
  858. }
  859. /*
  860. * pass it the bitmap lock in lock_bh if you have it.
  861. */
  862. static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
  863. handle_t *handle,
  864. struct ocfs2_alloc_context *ac)
  865. {
  866. int status = 0;
  867. u32 cluster_off, cluster_count;
  868. struct ocfs2_dinode *alloc = NULL;
  869. struct ocfs2_local_alloc *la;
  870. mlog_entry_void();
  871. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  872. la = OCFS2_LOCAL_ALLOC(alloc);
  873. if (alloc->id1.bitmap1.i_total)
  874. mlog(0, "asking me to alloc a new window over a non-empty "
  875. "one\n");
  876. mlog(0, "Allocating %u clusters for a new window.\n",
  877. osb->local_alloc_bits);
  878. /* Instruct the allocation code to try the most recently used
  879. * cluster group. We'll re-record the group used this pass
  880. * below. */
  881. ac->ac_last_group = osb->la_last_gd;
  882. /* we used the generic suballoc reserve function, but we set
  883. * everything up nicely, so there's no reason why we can't use
  884. * the more specific cluster api to claim bits. */
  885. status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits,
  886. &cluster_off, &cluster_count);
  887. if (status == -ENOSPC) {
  888. retry_enospc:
  889. /*
  890. * Note: We could also try syncing the journal here to
  891. * allow use of any free bits which the current
  892. * transaction can't give us access to. --Mark
  893. */
  894. if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) ==
  895. OCFS2_LA_DISABLED)
  896. goto bail;
  897. status = ocfs2_claim_clusters(osb, handle, ac,
  898. osb->local_alloc_bits,
  899. &cluster_off,
  900. &cluster_count);
  901. if (status == -ENOSPC)
  902. goto retry_enospc;
  903. /*
  904. * We only shrunk the *minimum* number of in our
  905. * request - it's entirely possible that the allocator
  906. * might give us more than we asked for.
  907. */
  908. if (status == 0) {
  909. spin_lock(&osb->osb_lock);
  910. osb->local_alloc_bits = cluster_count;
  911. spin_unlock(&osb->osb_lock);
  912. }
  913. }
  914. if (status < 0) {
  915. if (status != -ENOSPC)
  916. mlog_errno(status);
  917. goto bail;
  918. }
  919. osb->la_last_gd = ac->ac_last_group;
  920. la->la_bm_off = cpu_to_le32(cluster_off);
  921. alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count);
  922. /* just in case... In the future when we find space ourselves,
  923. * we don't have to get all contiguous -- but we'll have to
  924. * set all previously used bits in bitmap and update
  925. * la_bits_set before setting the bits in the main bitmap. */
  926. alloc->id1.bitmap1.i_used = 0;
  927. memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
  928. le16_to_cpu(la->la_size));
  929. mlog(0, "New window allocated:\n");
  930. mlog(0, "window la_bm_off = %u\n",
  931. OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
  932. mlog(0, "window bits = %u\n", le32_to_cpu(alloc->id1.bitmap1.i_total));
  933. bail:
  934. mlog_exit(status);
  935. return status;
  936. }
  937. /* Note that we do *NOT* lock the local alloc inode here as
  938. * it's been locked already for us. */
  939. static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
  940. struct inode *local_alloc_inode)
  941. {
  942. int status = 0;
  943. struct buffer_head *main_bm_bh = NULL;
  944. struct inode *main_bm_inode = NULL;
  945. handle_t *handle = NULL;
  946. struct ocfs2_dinode *alloc;
  947. struct ocfs2_dinode *alloc_copy = NULL;
  948. struct ocfs2_alloc_context *ac = NULL;
  949. mlog_entry_void();
  950. ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE);
  951. /* This will lock the main bitmap for us. */
  952. status = ocfs2_local_alloc_reserve_for_window(osb,
  953. &ac,
  954. &main_bm_inode,
  955. &main_bm_bh);
  956. if (status < 0) {
  957. if (status != -ENOSPC)
  958. mlog_errno(status);
  959. goto bail;
  960. }
  961. handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
  962. if (IS_ERR(handle)) {
  963. status = PTR_ERR(handle);
  964. handle = NULL;
  965. mlog_errno(status);
  966. goto bail;
  967. }
  968. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  969. /* We want to clear the local alloc before doing anything
  970. * else, so that if we error later during this operation,
  971. * local alloc shutdown won't try to double free main bitmap
  972. * bits. Make a copy so the sync function knows which bits to
  973. * free. */
  974. alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS);
  975. if (!alloc_copy) {
  976. status = -ENOMEM;
  977. mlog_errno(status);
  978. goto bail;
  979. }
  980. memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
  981. status = ocfs2_journal_access_di(handle, local_alloc_inode,
  982. osb->local_alloc_bh,
  983. OCFS2_JOURNAL_ACCESS_WRITE);
  984. if (status < 0) {
  985. mlog_errno(status);
  986. goto bail;
  987. }
  988. ocfs2_clear_local_alloc(alloc);
  989. status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
  990. if (status < 0) {
  991. mlog_errno(status);
  992. goto bail;
  993. }
  994. status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
  995. main_bm_inode, main_bm_bh);
  996. if (status < 0) {
  997. mlog_errno(status);
  998. goto bail;
  999. }
  1000. status = ocfs2_local_alloc_new_window(osb, handle, ac);
  1001. if (status < 0) {
  1002. if (status != -ENOSPC)
  1003. mlog_errno(status);
  1004. goto bail;
  1005. }
  1006. atomic_inc(&osb->alloc_stats.moves);
  1007. status = 0;
  1008. bail:
  1009. if (handle)
  1010. ocfs2_commit_trans(osb, handle);
  1011. brelse(main_bm_bh);
  1012. if (main_bm_inode)
  1013. iput(main_bm_inode);
  1014. if (alloc_copy)
  1015. kfree(alloc_copy);
  1016. if (ac)
  1017. ocfs2_free_alloc_context(ac);
  1018. mlog_exit(status);
  1019. return status;
  1020. }