localalloc.c 32 KB


  1. /* -*- mode: c; c-basic-offset: 8; -*-
  2. * vim: noexpandtab sw=8 ts=8 sts=0:
  3. *
  4. * localalloc.c
  5. *
  6. * Node local data allocation
  7. *
  8. * Copyright (C) 2002, 2004 Oracle. All rights reserved.
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2 of the License, or (at your option) any later version.
  14. *
  15. * This program is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU General Public
  21. * License along with this program; if not, write to the
  22. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23. * Boston, MA 021110-1307, USA.
  24. */
  25. #include <linux/fs.h>
  26. #include <linux/types.h>
  27. #include <linux/slab.h>
  28. #include <linux/highmem.h>
  29. #include <linux/bitops.h>
  30. #include <linux/debugfs.h>
  31. #define MLOG_MASK_PREFIX ML_DISK_ALLOC
  32. #include <cluster/masklog.h>
  33. #include "ocfs2.h"
  34. #include "alloc.h"
  35. #include "dlmglue.h"
  36. #include "inode.h"
  37. #include "journal.h"
  38. #include "localalloc.h"
  39. #include "suballoc.h"
  40. #include "super.h"
  41. #include "sysfile.h"
  42. #include "buffer_head_io.h"
  43. #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab))
  44. static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
  45. static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
  46. struct ocfs2_dinode *alloc,
  47. u32 numbits);
  48. static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
  49. static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
  50. handle_t *handle,
  51. struct ocfs2_dinode *alloc,
  52. struct inode *main_bm_inode,
  53. struct buffer_head *main_bm_bh);
  54. static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
  55. struct ocfs2_alloc_context **ac,
  56. struct inode **bitmap_inode,
  57. struct buffer_head **bitmap_bh);
  58. static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
  59. handle_t *handle,
  60. struct ocfs2_alloc_context *ac);
  61. static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
  62. struct inode *local_alloc_inode);
  63. #ifdef CONFIG_OCFS2_FS_STATS
  64. static int ocfs2_la_debug_open(struct inode *inode, struct file *file)
  65. {
  66. file->private_data = inode->i_private;
  67. return 0;
  68. }
  69. #define LA_DEBUG_BUF_SZ PAGE_CACHE_SIZE
  70. #define LA_DEBUG_VER 1
  71. static ssize_t ocfs2_la_debug_read(struct file *file, char __user *userbuf,
  72. size_t count, loff_t *ppos)
  73. {
  74. static DEFINE_MUTEX(la_debug_mutex);
  75. struct ocfs2_super *osb = file->private_data;
  76. int written, ret;
  77. char *buf = osb->local_alloc_debug_buf;
  78. mutex_lock(&la_debug_mutex);
  79. memset(buf, 0, LA_DEBUG_BUF_SZ);
  80. written = snprintf(buf, LA_DEBUG_BUF_SZ,
  81. "0x%x\t0x%llx\t%u\t%u\t0x%x\n",
  82. LA_DEBUG_VER,
  83. (unsigned long long)osb->la_last_gd,
  84. osb->local_alloc_default_bits,
  85. osb->local_alloc_bits, osb->local_alloc_state);
  86. ret = simple_read_from_buffer(userbuf, count, ppos, buf, written);
  87. mutex_unlock(&la_debug_mutex);
  88. return ret;
  89. }
  90. static const struct file_operations ocfs2_la_debug_fops = {
  91. .open = ocfs2_la_debug_open,
  92. .read = ocfs2_la_debug_read,
  93. };
  94. static void ocfs2_init_la_debug(struct ocfs2_super *osb)
  95. {
  96. osb->local_alloc_debug_buf = kmalloc(LA_DEBUG_BUF_SZ, GFP_NOFS);
  97. if (!osb->local_alloc_debug_buf)
  98. return;
  99. osb->local_alloc_debug = debugfs_create_file("local_alloc_stats",
  100. S_IFREG|S_IRUSR,
  101. osb->osb_debug_root,
  102. osb,
  103. &ocfs2_la_debug_fops);
  104. if (!osb->local_alloc_debug) {
  105. kfree(osb->local_alloc_debug_buf);
  106. osb->local_alloc_debug_buf = NULL;
  107. }
  108. }
  109. static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb)
  110. {
  111. if (osb->local_alloc_debug)
  112. debugfs_remove(osb->local_alloc_debug);
  113. if (osb->local_alloc_debug_buf)
  114. kfree(osb->local_alloc_debug_buf);
  115. osb->local_alloc_debug_buf = NULL;
  116. osb->local_alloc_debug = NULL;
  117. }
  118. #else /* CONFIG_OCFS2_FS_STATS */
  119. static void ocfs2_init_la_debug(struct ocfs2_super *osb)
  120. {
  121. return;
  122. }
  123. static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb)
  124. {
  125. return;
  126. }
  127. #endif
  128. static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
  129. {
  130. return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
  131. osb->local_alloc_state == OCFS2_LA_ENABLED);
  132. }
  133. void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
  134. unsigned int num_clusters)
  135. {
  136. spin_lock(&osb->osb_lock);
  137. if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
  138. osb->local_alloc_state == OCFS2_LA_THROTTLED)
  139. if (num_clusters >= osb->local_alloc_default_bits) {
  140. cancel_delayed_work(&osb->la_enable_wq);
  141. osb->local_alloc_state = OCFS2_LA_ENABLED;
  142. }
  143. spin_unlock(&osb->osb_lock);
  144. }
  145. void ocfs2_la_enable_worker(struct work_struct *work)
  146. {
  147. struct ocfs2_super *osb =
  148. container_of(work, struct ocfs2_super,
  149. la_enable_wq.work);
  150. spin_lock(&osb->osb_lock);
  151. osb->local_alloc_state = OCFS2_LA_ENABLED;
  152. spin_unlock(&osb->osb_lock);
  153. }
  154. /*
  155. * Tell us whether a given allocation should use the local alloc
  156. * file. Otherwise, it has to go to the main bitmap.
  157. *
  158. * This function does semi-dirty reads of local alloc size and state!
  159. * This is ok however, as the values are re-checked once under mutex.
  160. */
  161. int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
  162. {
  163. int ret = 0;
  164. int la_bits;
  165. spin_lock(&osb->osb_lock);
  166. la_bits = osb->local_alloc_bits;
  167. if (!ocfs2_la_state_enabled(osb))
  168. goto bail;
  169. /* la_bits should be at least twice the size (in clusters) of
  170. * a new block group. We want to be sure block group
  171. * allocations go through the local alloc, so allow an
  172. * allocation to take up to half the bitmap. */
  173. if (bits > (la_bits / 2))
  174. goto bail;
  175. ret = 1;
  176. bail:
  177. mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n",
  178. osb->local_alloc_state, (unsigned long long)bits, la_bits, ret);
  179. spin_unlock(&osb->osb_lock);
  180. return ret;
  181. }
  182. int ocfs2_load_local_alloc(struct ocfs2_super *osb)
  183. {
  184. int status = 0;
  185. struct ocfs2_dinode *alloc = NULL;
  186. struct buffer_head *alloc_bh = NULL;
  187. u32 num_used;
  188. struct inode *inode = NULL;
  189. struct ocfs2_local_alloc *la;
  190. mlog_entry_void();
  191. ocfs2_init_la_debug(osb);
  192. if (osb->local_alloc_bits == 0)
  193. goto bail;
  194. if (osb->local_alloc_bits >= osb->bitmap_cpg) {
  195. mlog(ML_NOTICE, "Requested local alloc window %d is larger "
  196. "than max possible %u. Using defaults.\n",
  197. osb->local_alloc_bits, (osb->bitmap_cpg - 1));
  198. osb->local_alloc_bits =
  199. ocfs2_megabytes_to_clusters(osb->sb,
  200. OCFS2_DEFAULT_LOCAL_ALLOC_SIZE);
  201. }
  202. /* read the alloc off disk */
  203. inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE,
  204. osb->slot_num);
  205. if (!inode) {
  206. status = -EINVAL;
  207. mlog_errno(status);
  208. goto bail;
  209. }
  210. status = ocfs2_read_inode_block_full(inode, &alloc_bh,
  211. OCFS2_BH_IGNORE_CACHE);
  212. if (status < 0) {
  213. mlog_errno(status);
  214. goto bail;
  215. }
  216. alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
  217. la = OCFS2_LOCAL_ALLOC(alloc);
  218. if (!(le32_to_cpu(alloc->i_flags) &
  219. (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) {
  220. mlog(ML_ERROR, "Invalid local alloc inode, %llu\n",
  221. (unsigned long long)OCFS2_I(inode)->ip_blkno);
  222. status = -EINVAL;
  223. goto bail;
  224. }
  225. if ((la->la_size == 0) ||
  226. (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) {
  227. mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n",
  228. le16_to_cpu(la->la_size));
  229. status = -EINVAL;
  230. goto bail;
  231. }
  232. /* do a little verification. */
  233. num_used = ocfs2_local_alloc_count_bits(alloc);
  234. /* hopefully the local alloc has always been recovered before
  235. * we load it. */
  236. if (num_used
  237. || alloc->id1.bitmap1.i_used
  238. || alloc->id1.bitmap1.i_total
  239. || la->la_bm_off)
  240. mlog(ML_ERROR, "Local alloc hasn't been recovered!\n"
  241. "found = %u, set = %u, taken = %u, off = %u\n",
  242. num_used, le32_to_cpu(alloc->id1.bitmap1.i_used),
  243. le32_to_cpu(alloc->id1.bitmap1.i_total),
  244. OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
  245. osb->local_alloc_bh = alloc_bh;
  246. osb->local_alloc_state = OCFS2_LA_ENABLED;
  247. bail:
  248. if (status < 0)
  249. brelse(alloc_bh);
  250. if (inode)
  251. iput(inode);
  252. if (status < 0)
  253. ocfs2_shutdown_la_debug(osb);
  254. mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits);
  255. mlog_exit(status);
  256. return status;
  257. }
  258. /*
  259. * return any unused bits to the bitmap and write out a clean
  260. * local_alloc.
  261. *
  262. * local_alloc_bh is optional. If not passed, we will simply use the
  263. * one off osb. If you do pass it however, be warned that it *will* be
  264. * returned brelse'd and NULL'd out.*/
  265. void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
  266. {
  267. int status;
  268. handle_t *handle;
  269. struct inode *local_alloc_inode = NULL;
  270. struct buffer_head *bh = NULL;
  271. struct buffer_head *main_bm_bh = NULL;
  272. struct inode *main_bm_inode = NULL;
  273. struct ocfs2_dinode *alloc_copy = NULL;
  274. struct ocfs2_dinode *alloc = NULL;
  275. mlog_entry_void();
  276. cancel_delayed_work(&osb->la_enable_wq);
  277. flush_workqueue(ocfs2_wq);
  278. ocfs2_shutdown_la_debug(osb);
  279. if (osb->local_alloc_state == OCFS2_LA_UNUSED)
  280. goto out;
  281. local_alloc_inode =
  282. ocfs2_get_system_file_inode(osb,
  283. LOCAL_ALLOC_SYSTEM_INODE,
  284. osb->slot_num);
  285. if (!local_alloc_inode) {
  286. status = -ENOENT;
  287. mlog_errno(status);
  288. goto out;
  289. }
  290. osb->local_alloc_state = OCFS2_LA_DISABLED;
  291. main_bm_inode = ocfs2_get_system_file_inode(osb,
  292. GLOBAL_BITMAP_SYSTEM_INODE,
  293. OCFS2_INVALID_SLOT);
  294. if (!main_bm_inode) {
  295. status = -EINVAL;
  296. mlog_errno(status);
  297. goto out;
  298. }
  299. mutex_lock(&main_bm_inode->i_mutex);
  300. status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
  301. if (status < 0) {
  302. mlog_errno(status);
  303. goto out_mutex;
  304. }
  305. /* WINDOW_MOVE_CREDITS is a bit heavy... */
  306. handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
  307. if (IS_ERR(handle)) {
  308. mlog_errno(PTR_ERR(handle));
  309. handle = NULL;
  310. goto out_unlock;
  311. }
  312. bh = osb->local_alloc_bh;
  313. alloc = (struct ocfs2_dinode *) bh->b_data;
  314. alloc_copy = kmalloc(bh->b_size, GFP_NOFS);
  315. if (!alloc_copy) {
  316. status = -ENOMEM;
  317. goto out_commit;
  318. }
  319. memcpy(alloc_copy, alloc, bh->b_size);
  320. status = ocfs2_journal_access(handle, local_alloc_inode, bh,
  321. OCFS2_JOURNAL_ACCESS_WRITE);
  322. if (status < 0) {
  323. mlog_errno(status);
  324. goto out_commit;
  325. }
  326. ocfs2_clear_local_alloc(alloc);
  327. status = ocfs2_journal_dirty(handle, bh);
  328. if (status < 0) {
  329. mlog_errno(status);
  330. goto out_commit;
  331. }
  332. brelse(bh);
  333. osb->local_alloc_bh = NULL;
  334. osb->local_alloc_state = OCFS2_LA_UNUSED;
  335. status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
  336. main_bm_inode, main_bm_bh);
  337. if (status < 0)
  338. mlog_errno(status);
  339. out_commit:
  340. ocfs2_commit_trans(osb, handle);
  341. out_unlock:
  342. brelse(main_bm_bh);
  343. ocfs2_inode_unlock(main_bm_inode, 1);
  344. out_mutex:
  345. mutex_unlock(&main_bm_inode->i_mutex);
  346. iput(main_bm_inode);
  347. out:
  348. if (local_alloc_inode)
  349. iput(local_alloc_inode);
  350. if (alloc_copy)
  351. kfree(alloc_copy);
  352. mlog_exit_void();
  353. }
  354. /*
  355. * We want to free the bitmap bits outside of any recovery context as
  356. * we'll need a cluster lock to do so, but we must clear the local
  357. * alloc before giving up the recovered nodes journal. To solve this,
  358. * we kmalloc a copy of the local alloc before it's change for the
  359. * caller to process with ocfs2_complete_local_alloc_recovery
  360. */
  361. int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
  362. int slot_num,
  363. struct ocfs2_dinode **alloc_copy)
  364. {
  365. int status = 0;
  366. struct buffer_head *alloc_bh = NULL;
  367. struct inode *inode = NULL;
  368. struct ocfs2_dinode *alloc;
  369. mlog_entry("(slot_num = %d)\n", slot_num);
  370. *alloc_copy = NULL;
  371. inode = ocfs2_get_system_file_inode(osb,
  372. LOCAL_ALLOC_SYSTEM_INODE,
  373. slot_num);
  374. if (!inode) {
  375. status = -EINVAL;
  376. mlog_errno(status);
  377. goto bail;
  378. }
  379. mutex_lock(&inode->i_mutex);
  380. status = ocfs2_read_inode_block_full(inode, &alloc_bh,
  381. OCFS2_BH_IGNORE_CACHE);
  382. if (status < 0) {
  383. mlog_errno(status);
  384. goto bail;
  385. }
  386. *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL);
  387. if (!(*alloc_copy)) {
  388. status = -ENOMEM;
  389. goto bail;
  390. }
  391. memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size);
  392. alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
  393. ocfs2_clear_local_alloc(alloc);
  394. status = ocfs2_write_block(osb, alloc_bh, inode);
  395. if (status < 0)
  396. mlog_errno(status);
  397. bail:
  398. if ((status < 0) && (*alloc_copy)) {
  399. kfree(*alloc_copy);
  400. *alloc_copy = NULL;
  401. }
  402. brelse(alloc_bh);
  403. if (inode) {
  404. mutex_unlock(&inode->i_mutex);
  405. iput(inode);
  406. }
  407. mlog_exit(status);
  408. return status;
  409. }
  410. /*
  411. * Step 2: By now, we've completed the journal recovery, we've stamped
  412. * a clean local alloc on disk and dropped the node out of the
  413. * recovery map. Dlm locks will no longer stall, so lets clear out the
  414. * main bitmap.
  415. */
  416. int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
  417. struct ocfs2_dinode *alloc)
  418. {
  419. int status;
  420. handle_t *handle;
  421. struct buffer_head *main_bm_bh = NULL;
  422. struct inode *main_bm_inode;
  423. mlog_entry_void();
  424. main_bm_inode = ocfs2_get_system_file_inode(osb,
  425. GLOBAL_BITMAP_SYSTEM_INODE,
  426. OCFS2_INVALID_SLOT);
  427. if (!main_bm_inode) {
  428. status = -EINVAL;
  429. mlog_errno(status);
  430. goto out;
  431. }
  432. mutex_lock(&main_bm_inode->i_mutex);
  433. status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
  434. if (status < 0) {
  435. mlog_errno(status);
  436. goto out_mutex;
  437. }
  438. handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
  439. if (IS_ERR(handle)) {
  440. status = PTR_ERR(handle);
  441. handle = NULL;
  442. mlog_errno(status);
  443. goto out_unlock;
  444. }
  445. /* we want the bitmap change to be recorded on disk asap */
  446. handle->h_sync = 1;
  447. status = ocfs2_sync_local_to_main(osb, handle, alloc,
  448. main_bm_inode, main_bm_bh);
  449. if (status < 0)
  450. mlog_errno(status);
  451. ocfs2_commit_trans(osb, handle);
  452. out_unlock:
  453. ocfs2_inode_unlock(main_bm_inode, 1);
  454. out_mutex:
  455. mutex_unlock(&main_bm_inode->i_mutex);
  456. brelse(main_bm_bh);
  457. iput(main_bm_inode);
  458. out:
  459. if (!status)
  460. ocfs2_init_inode_steal_slot(osb);
  461. mlog_exit(status);
  462. return status;
  463. }
  464. /* Check to see if the local alloc window is within ac->ac_max_block */
  465. static int ocfs2_local_alloc_in_range(struct inode *inode,
  466. struct ocfs2_alloc_context *ac,
  467. u32 bits_wanted)
  468. {
  469. struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  470. struct ocfs2_dinode *alloc;
  471. struct ocfs2_local_alloc *la;
  472. int start;
  473. u64 block_off;
  474. if (!ac->ac_max_block)
  475. return 1;
  476. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  477. la = OCFS2_LOCAL_ALLOC(alloc);
  478. start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
  479. if (start == -1) {
  480. mlog_errno(-ENOSPC);
  481. return 0;
  482. }
  483. /*
  484. * Converting (bm_off + start + bits_wanted) to blocks gives us
  485. * the blkno just past our actual allocation. This is perfect
  486. * to compare with ac_max_block.
  487. */
  488. block_off = ocfs2_clusters_to_blocks(inode->i_sb,
  489. le32_to_cpu(la->la_bm_off) +
  490. start + bits_wanted);
  491. mlog(0, "Checking %llu against %llu\n",
  492. (unsigned long long)block_off,
  493. (unsigned long long)ac->ac_max_block);
  494. if (block_off > ac->ac_max_block)
  495. return 0;
  496. return 1;
  497. }
  498. /*
  499. * make sure we've got at least bits_wanted contiguous bits in the
  500. * local alloc. You lose them when you drop i_mutex.
  501. *
  502. * We will add ourselves to the transaction passed in, but may start
  503. * our own in order to shift windows.
  504. */
  505. int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
  506. u32 bits_wanted,
  507. struct ocfs2_alloc_context *ac)
  508. {
  509. int status;
  510. struct ocfs2_dinode *alloc;
  511. struct inode *local_alloc_inode;
  512. unsigned int free_bits;
  513. mlog_entry_void();
  514. BUG_ON(!ac);
  515. local_alloc_inode =
  516. ocfs2_get_system_file_inode(osb,
  517. LOCAL_ALLOC_SYSTEM_INODE,
  518. osb->slot_num);
  519. if (!local_alloc_inode) {
  520. status = -ENOENT;
  521. mlog_errno(status);
  522. goto bail;
  523. }
  524. mutex_lock(&local_alloc_inode->i_mutex);
  525. /*
  526. * We must double check state and allocator bits because
  527. * another process may have changed them while holding i_mutex.
  528. */
  529. spin_lock(&osb->osb_lock);
  530. if (!ocfs2_la_state_enabled(osb) ||
  531. (bits_wanted > osb->local_alloc_bits)) {
  532. spin_unlock(&osb->osb_lock);
  533. status = -ENOSPC;
  534. goto bail;
  535. }
  536. spin_unlock(&osb->osb_lock);
  537. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  538. #ifdef CONFIG_OCFS2_DEBUG_FS
  539. if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
  540. ocfs2_local_alloc_count_bits(alloc)) {
  541. ocfs2_error(osb->sb, "local alloc inode %llu says it has "
  542. "%u free bits, but a count shows %u",
  543. (unsigned long long)le64_to_cpu(alloc->i_blkno),
  544. le32_to_cpu(alloc->id1.bitmap1.i_used),
  545. ocfs2_local_alloc_count_bits(alloc));
  546. status = -EIO;
  547. goto bail;
  548. }
  549. #endif
  550. free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
  551. le32_to_cpu(alloc->id1.bitmap1.i_used);
  552. if (bits_wanted > free_bits) {
  553. /* uhoh, window change time. */
  554. status =
  555. ocfs2_local_alloc_slide_window(osb, local_alloc_inode);
  556. if (status < 0) {
  557. if (status != -ENOSPC)
  558. mlog_errno(status);
  559. goto bail;
  560. }
  561. /*
  562. * Under certain conditions, the window slide code
  563. * might have reduced the number of bits available or
  564. * disabled the the local alloc entirely. Re-check
  565. * here and return -ENOSPC if necessary.
  566. */
  567. status = -ENOSPC;
  568. if (!ocfs2_la_state_enabled(osb))
  569. goto bail;
  570. free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
  571. le32_to_cpu(alloc->id1.bitmap1.i_used);
  572. if (bits_wanted > free_bits)
  573. goto bail;
  574. }
  575. if (ac->ac_max_block)
  576. mlog(0, "Calling in_range for max block %llu\n",
  577. (unsigned long long)ac->ac_max_block);
  578. if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
  579. bits_wanted)) {
  580. /*
  581. * The window is outside ac->ac_max_block.
  582. * This errno tells the caller to keep localalloc enabled
  583. * but to get the allocation from the main bitmap.
  584. */
  585. status = -EFBIG;
  586. goto bail;
  587. }
  588. ac->ac_inode = local_alloc_inode;
  589. /* We should never use localalloc from another slot */
  590. ac->ac_alloc_slot = osb->slot_num;
  591. ac->ac_which = OCFS2_AC_USE_LOCAL;
  592. get_bh(osb->local_alloc_bh);
  593. ac->ac_bh = osb->local_alloc_bh;
  594. status = 0;
  595. bail:
  596. if (status < 0 && local_alloc_inode) {
  597. mutex_unlock(&local_alloc_inode->i_mutex);
  598. iput(local_alloc_inode);
  599. }
  600. mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num,
  601. status);
  602. mlog_exit(status);
  603. return status;
  604. }
  605. int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
  606. handle_t *handle,
  607. struct ocfs2_alloc_context *ac,
  608. u32 bits_wanted,
  609. u32 *bit_off,
  610. u32 *num_bits)
  611. {
  612. int status, start;
  613. struct inode *local_alloc_inode;
  614. void *bitmap;
  615. struct ocfs2_dinode *alloc;
  616. struct ocfs2_local_alloc *la;
  617. mlog_entry_void();
  618. BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
  619. local_alloc_inode = ac->ac_inode;
  620. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  621. la = OCFS2_LOCAL_ALLOC(alloc);
  622. start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
  623. if (start == -1) {
  624. /* TODO: Shouldn't we just BUG here? */
  625. status = -ENOSPC;
  626. mlog_errno(status);
  627. goto bail;
  628. }
  629. bitmap = la->la_bitmap;
  630. *bit_off = le32_to_cpu(la->la_bm_off) + start;
  631. /* local alloc is always contiguous by nature -- we never
  632. * delete bits from it! */
  633. *num_bits = bits_wanted;
  634. status = ocfs2_journal_access(handle, local_alloc_inode,
  635. osb->local_alloc_bh,
  636. OCFS2_JOURNAL_ACCESS_WRITE);
  637. if (status < 0) {
  638. mlog_errno(status);
  639. goto bail;
  640. }
  641. while(bits_wanted--)
  642. ocfs2_set_bit(start++, bitmap);
  643. le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
  644. status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
  645. if (status < 0) {
  646. mlog_errno(status);
  647. goto bail;
  648. }
  649. status = 0;
  650. bail:
  651. mlog_exit(status);
  652. return status;
  653. }
  654. static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
  655. {
  656. int i;
  657. u8 *buffer;
  658. u32 count = 0;
  659. struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
  660. mlog_entry_void();
  661. buffer = la->la_bitmap;
  662. for (i = 0; i < le16_to_cpu(la->la_size); i++)
  663. count += hweight8(buffer[i]);
  664. mlog_exit(count);
  665. return count;
  666. }
  667. static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
  668. struct ocfs2_dinode *alloc,
  669. u32 numbits)
  670. {
  671. int numfound, bitoff, left, startoff, lastzero;
  672. void *bitmap = NULL;
  673. mlog_entry("(numbits wanted = %u)\n", numbits);
  674. if (!alloc->id1.bitmap1.i_total) {
  675. mlog(0, "No bits in my window!\n");
  676. bitoff = -1;
  677. goto bail;
  678. }
  679. bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
  680. numfound = bitoff = startoff = 0;
  681. lastzero = -1;
  682. left = le32_to_cpu(alloc->id1.bitmap1.i_total);
  683. while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) {
  684. if (bitoff == left) {
  685. /* mlog(0, "bitoff (%d) == left", bitoff); */
  686. break;
  687. }
  688. /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, "
  689. "numfound = %d\n", bitoff, startoff, numfound);*/
  690. /* Ok, we found a zero bit... is it contig. or do we
  691. * start over?*/
  692. if (bitoff == startoff) {
  693. /* we found a zero */
  694. numfound++;
  695. startoff++;
  696. } else {
  697. /* got a zero after some ones */
  698. numfound = 1;
  699. startoff = bitoff+1;
  700. }
  701. /* we got everything we needed */
  702. if (numfound == numbits) {
  703. /* mlog(0, "Found it all!\n"); */
  704. break;
  705. }
  706. }
  707. mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
  708. numfound);
  709. if (numfound == numbits)
  710. bitoff = startoff - numfound;
  711. else
  712. bitoff = -1;
  713. bail:
  714. mlog_exit(bitoff);
  715. return bitoff;
  716. }
  717. static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc)
  718. {
  719. struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
  720. int i;
  721. mlog_entry_void();
  722. alloc->id1.bitmap1.i_total = 0;
  723. alloc->id1.bitmap1.i_used = 0;
  724. la->la_bm_off = 0;
  725. for(i = 0; i < le16_to_cpu(la->la_size); i++)
  726. la->la_bitmap[i] = 0;
  727. mlog_exit_void();
  728. }
  729. #if 0
  730. /* turn this on and uncomment below to aid debugging window shifts. */
  731. static void ocfs2_verify_zero_bits(unsigned long *bitmap,
  732. unsigned int start,
  733. unsigned int count)
  734. {
  735. unsigned int tmp = count;
  736. while(tmp--) {
  737. if (ocfs2_test_bit(start + tmp, bitmap)) {
  738. printk("ocfs2_verify_zero_bits: start = %u, count = "
  739. "%u\n", start, count);
  740. printk("ocfs2_verify_zero_bits: bit %u is set!",
  741. start + tmp);
  742. BUG();
  743. }
  744. }
  745. }
  746. #endif
  747. /*
  748. * sync the local alloc to main bitmap.
  749. *
  750. * assumes you've already locked the main bitmap -- the bitmap inode
  751. * passed is used for caching.
  752. */
  753. static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
  754. handle_t *handle,
  755. struct ocfs2_dinode *alloc,
  756. struct inode *main_bm_inode,
  757. struct buffer_head *main_bm_bh)
  758. {
  759. int status = 0;
  760. int bit_off, left, count, start;
  761. u64 la_start_blk;
  762. u64 blkno;
  763. void *bitmap;
  764. struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
  765. mlog_entry("total = %u, used = %u\n",
  766. le32_to_cpu(alloc->id1.bitmap1.i_total),
  767. le32_to_cpu(alloc->id1.bitmap1.i_used));
  768. if (!alloc->id1.bitmap1.i_total) {
  769. mlog(0, "nothing to sync!\n");
  770. goto bail;
  771. }
  772. if (le32_to_cpu(alloc->id1.bitmap1.i_used) ==
  773. le32_to_cpu(alloc->id1.bitmap1.i_total)) {
  774. mlog(0, "all bits were taken!\n");
  775. goto bail;
  776. }
  777. la_start_blk = ocfs2_clusters_to_blocks(osb->sb,
  778. le32_to_cpu(la->la_bm_off));
  779. bitmap = la->la_bitmap;
  780. start = count = bit_off = 0;
  781. left = le32_to_cpu(alloc->id1.bitmap1.i_total);
  782. while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start))
  783. != -1) {
  784. if ((bit_off < left) && (bit_off == start)) {
  785. count++;
  786. start++;
  787. continue;
  788. }
  789. if (count) {
  790. blkno = la_start_blk +
  791. ocfs2_clusters_to_blocks(osb->sb,
  792. start - count);
  793. mlog(0, "freeing %u bits starting at local alloc bit "
  794. "%u (la_start_blk = %llu, blkno = %llu)\n",
  795. count, start - count,
  796. (unsigned long long)la_start_blk,
  797. (unsigned long long)blkno);
  798. status = ocfs2_free_clusters(handle, main_bm_inode,
  799. main_bm_bh, blkno, count);
  800. if (status < 0) {
  801. mlog_errno(status);
  802. goto bail;
  803. }
  804. }
  805. if (bit_off >= left)
  806. break;
  807. count = 1;
  808. start = bit_off + 1;
  809. }
  810. bail:
  811. mlog_exit(status);
  812. return status;
  813. }
  814. enum ocfs2_la_event {
  815. OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */
  816. OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has
  817. * enough bits theoretically
  818. * free, but a contiguous
  819. * allocation could not be
  820. * found. */
  821. OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have
  822. * enough bits free to satisfy
  823. * our request. */
  824. };
  825. #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ)
  826. /*
  827. * Given an event, calculate the size of our next local alloc window.
  828. *
  829. * This should always be called under i_mutex of the local alloc inode
  830. * so that local alloc disabling doesn't race with processes trying to
  831. * use the allocator.
  832. *
  833. * Returns the state which the local alloc was left in. This value can
  834. * be ignored by some paths.
  835. */
  836. static int ocfs2_recalc_la_window(struct ocfs2_super *osb,
  837. enum ocfs2_la_event event)
  838. {
  839. unsigned int bits;
  840. int state;
  841. spin_lock(&osb->osb_lock);
  842. if (osb->local_alloc_state == OCFS2_LA_DISABLED) {
  843. WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED);
  844. goto out_unlock;
  845. }
  846. /*
  847. * ENOSPC and fragmentation are treated similarly for now.
  848. */
  849. if (event == OCFS2_LA_EVENT_ENOSPC ||
  850. event == OCFS2_LA_EVENT_FRAGMENTED) {
  851. /*
  852. * We ran out of contiguous space in the primary
  853. * bitmap. Drastically reduce the number of bits used
  854. * by local alloc until we have to disable it.
  855. */
  856. bits = osb->local_alloc_bits >> 1;
  857. if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) {
  858. /*
  859. * By setting state to THROTTLED, we'll keep
  860. * the number of local alloc bits used down
  861. * until an event occurs which would give us
  862. * reason to assume the bitmap situation might
  863. * have changed.
  864. */
  865. osb->local_alloc_state = OCFS2_LA_THROTTLED;
  866. osb->local_alloc_bits = bits;
  867. } else {
  868. osb->local_alloc_state = OCFS2_LA_DISABLED;
  869. }
  870. queue_delayed_work(ocfs2_wq, &osb->la_enable_wq,
  871. OCFS2_LA_ENABLE_INTERVAL);
  872. goto out_unlock;
  873. }
  874. /*
  875. * Don't increase the size of the local alloc window until we
  876. * know we might be able to fulfill the request. Otherwise, we
  877. * risk bouncing around the global bitmap during periods of
  878. * low space.
  879. */
  880. if (osb->local_alloc_state != OCFS2_LA_THROTTLED)
  881. osb->local_alloc_bits = osb->local_alloc_default_bits;
  882. out_unlock:
  883. state = osb->local_alloc_state;
  884. spin_unlock(&osb->osb_lock);
  885. return state;
  886. }
  887. static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
  888. struct ocfs2_alloc_context **ac,
  889. struct inode **bitmap_inode,
  890. struct buffer_head **bitmap_bh)
  891. {
  892. int status;
  893. *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
  894. if (!(*ac)) {
  895. status = -ENOMEM;
  896. mlog_errno(status);
  897. goto bail;
  898. }
  899. retry_enospc:
  900. (*ac)->ac_bits_wanted = osb->local_alloc_bits;
  901. status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
  902. if (status == -ENOSPC) {
  903. if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
  904. OCFS2_LA_DISABLED)
  905. goto bail;
  906. ocfs2_free_ac_resource(*ac);
  907. memset(*ac, 0, sizeof(struct ocfs2_alloc_context));
  908. goto retry_enospc;
  909. }
  910. if (status < 0) {
  911. mlog_errno(status);
  912. goto bail;
  913. }
  914. *bitmap_inode = (*ac)->ac_inode;
  915. igrab(*bitmap_inode);
  916. *bitmap_bh = (*ac)->ac_bh;
  917. get_bh(*bitmap_bh);
  918. status = 0;
  919. bail:
  920. if ((status < 0) && *ac) {
  921. ocfs2_free_alloc_context(*ac);
  922. *ac = NULL;
  923. }
  924. mlog_exit(status);
  925. return status;
  926. }
  927. /*
  928. * pass it the bitmap lock in lock_bh if you have it.
  929. */
  930. static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
  931. handle_t *handle,
  932. struct ocfs2_alloc_context *ac)
  933. {
  934. int status = 0;
  935. u32 cluster_off, cluster_count;
  936. struct ocfs2_dinode *alloc = NULL;
  937. struct ocfs2_local_alloc *la;
  938. mlog_entry_void();
  939. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  940. la = OCFS2_LOCAL_ALLOC(alloc);
  941. if (alloc->id1.bitmap1.i_total)
  942. mlog(0, "asking me to alloc a new window over a non-empty "
  943. "one\n");
  944. mlog(0, "Allocating %u clusters for a new window.\n",
  945. osb->local_alloc_bits);
  946. /* Instruct the allocation code to try the most recently used
  947. * cluster group. We'll re-record the group used this pass
  948. * below. */
  949. ac->ac_last_group = osb->la_last_gd;
  950. /* we used the generic suballoc reserve function, but we set
  951. * everything up nicely, so there's no reason why we can't use
  952. * the more specific cluster api to claim bits. */
  953. status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits,
  954. &cluster_off, &cluster_count);
  955. if (status == -ENOSPC) {
  956. retry_enospc:
  957. /*
  958. * Note: We could also try syncing the journal here to
  959. * allow use of any free bits which the current
  960. * transaction can't give us access to. --Mark
  961. */
  962. if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) ==
  963. OCFS2_LA_DISABLED)
  964. goto bail;
  965. status = ocfs2_claim_clusters(osb, handle, ac,
  966. osb->local_alloc_bits,
  967. &cluster_off,
  968. &cluster_count);
  969. if (status == -ENOSPC)
  970. goto retry_enospc;
  971. /*
  972. * We only shrunk the *minimum* number of in our
  973. * request - it's entirely possible that the allocator
  974. * might give us more than we asked for.
  975. */
  976. if (status == 0) {
  977. spin_lock(&osb->osb_lock);
  978. osb->local_alloc_bits = cluster_count;
  979. spin_unlock(&osb->osb_lock);
  980. }
  981. }
  982. if (status < 0) {
  983. if (status != -ENOSPC)
  984. mlog_errno(status);
  985. goto bail;
  986. }
  987. osb->la_last_gd = ac->ac_last_group;
  988. la->la_bm_off = cpu_to_le32(cluster_off);
  989. alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count);
  990. /* just in case... In the future when we find space ourselves,
  991. * we don't have to get all contiguous -- but we'll have to
  992. * set all previously used bits in bitmap and update
  993. * la_bits_set before setting the bits in the main bitmap. */
  994. alloc->id1.bitmap1.i_used = 0;
  995. memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
  996. le16_to_cpu(la->la_size));
  997. mlog(0, "New window allocated:\n");
  998. mlog(0, "window la_bm_off = %u\n",
  999. OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
  1000. mlog(0, "window bits = %u\n", le32_to_cpu(alloc->id1.bitmap1.i_total));
  1001. bail:
  1002. mlog_exit(status);
  1003. return status;
  1004. }
  1005. /* Note that we do *NOT* lock the local alloc inode here as
  1006. * it's been locked already for us. */
  1007. static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
  1008. struct inode *local_alloc_inode)
  1009. {
  1010. int status = 0;
  1011. struct buffer_head *main_bm_bh = NULL;
  1012. struct inode *main_bm_inode = NULL;
  1013. handle_t *handle = NULL;
  1014. struct ocfs2_dinode *alloc;
  1015. struct ocfs2_dinode *alloc_copy = NULL;
  1016. struct ocfs2_alloc_context *ac = NULL;
  1017. mlog_entry_void();
  1018. ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE);
  1019. /* This will lock the main bitmap for us. */
  1020. status = ocfs2_local_alloc_reserve_for_window(osb,
  1021. &ac,
  1022. &main_bm_inode,
  1023. &main_bm_bh);
  1024. if (status < 0) {
  1025. if (status != -ENOSPC)
  1026. mlog_errno(status);
  1027. goto bail;
  1028. }
  1029. handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
  1030. if (IS_ERR(handle)) {
  1031. status = PTR_ERR(handle);
  1032. handle = NULL;
  1033. mlog_errno(status);
  1034. goto bail;
  1035. }
  1036. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  1037. /* We want to clear the local alloc before doing anything
  1038. * else, so that if we error later during this operation,
  1039. * local alloc shutdown won't try to double free main bitmap
  1040. * bits. Make a copy so the sync function knows which bits to
  1041. * free. */
  1042. alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS);
  1043. if (!alloc_copy) {
  1044. status = -ENOMEM;
  1045. mlog_errno(status);
  1046. goto bail;
  1047. }
  1048. memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
  1049. status = ocfs2_journal_access(handle, local_alloc_inode,
  1050. osb->local_alloc_bh,
  1051. OCFS2_JOURNAL_ACCESS_WRITE);
  1052. if (status < 0) {
  1053. mlog_errno(status);
  1054. goto bail;
  1055. }
  1056. ocfs2_clear_local_alloc(alloc);
  1057. status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
  1058. if (status < 0) {
  1059. mlog_errno(status);
  1060. goto bail;
  1061. }
  1062. status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
  1063. main_bm_inode, main_bm_bh);
  1064. if (status < 0) {
  1065. mlog_errno(status);
  1066. goto bail;
  1067. }
  1068. status = ocfs2_local_alloc_new_window(osb, handle, ac);
  1069. if (status < 0) {
  1070. if (status != -ENOSPC)
  1071. mlog_errno(status);
  1072. goto bail;
  1073. }
  1074. atomic_inc(&osb->alloc_stats.moves);
  1075. status = 0;
  1076. bail:
  1077. if (handle)
  1078. ocfs2_commit_trans(osb, handle);
  1079. brelse(main_bm_bh);
  1080. if (main_bm_inode)
  1081. iput(main_bm_inode);
  1082. if (alloc_copy)
  1083. kfree(alloc_copy);
  1084. if (ac)
  1085. ocfs2_free_alloc_context(ac);
  1086. mlog_exit(status);
  1087. return status;
  1088. }