localalloc.c 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214
  1. /* -*- mode: c; c-basic-offset: 8; -*-
  2. * vim: noexpandtab sw=8 ts=8 sts=0:
  3. *
  4. * localalloc.c
  5. *
  6. * Node local data allocation
  7. *
  8. * Copyright (C) 2002, 2004 Oracle. All rights reserved.
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2 of the License, or (at your option) any later version.
  14. *
  15. * This program is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU General Public
  21. * License along with this program; if not, write to the
  22. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23. * Boston, MA 021110-1307, USA.
  24. */
  25. #include <linux/fs.h>
  26. #include <linux/types.h>
  27. #include <linux/slab.h>
  28. #include <linux/highmem.h>
  29. #include <linux/bitops.h>
  30. #define MLOG_MASK_PREFIX ML_DISK_ALLOC
  31. #include <cluster/masklog.h>
  32. #include "ocfs2.h"
  33. #include "alloc.h"
  34. #include "blockcheck.h"
  35. #include "dlmglue.h"
  36. #include "inode.h"
  37. #include "journal.h"
  38. #include "localalloc.h"
  39. #include "suballoc.h"
  40. #include "super.h"
  41. #include "sysfile.h"
  42. #include "buffer_head_io.h"
  43. #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab))
  44. static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
  45. static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
  46. struct ocfs2_dinode *alloc,
  47. u32 numbits);
  48. static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
  49. static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
  50. handle_t *handle,
  51. struct ocfs2_dinode *alloc,
  52. struct inode *main_bm_inode,
  53. struct buffer_head *main_bm_bh);
  54. static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
  55. struct ocfs2_alloc_context **ac,
  56. struct inode **bitmap_inode,
  57. struct buffer_head **bitmap_bh);
  58. static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
  59. handle_t *handle,
  60. struct ocfs2_alloc_context *ac);
  61. static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
  62. struct inode *local_alloc_inode);
  63. static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
  64. {
  65. return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
  66. osb->local_alloc_state == OCFS2_LA_ENABLED);
  67. }
  68. void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
  69. unsigned int num_clusters)
  70. {
  71. spin_lock(&osb->osb_lock);
  72. if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
  73. osb->local_alloc_state == OCFS2_LA_THROTTLED)
  74. if (num_clusters >= osb->local_alloc_default_bits) {
  75. cancel_delayed_work(&osb->la_enable_wq);
  76. osb->local_alloc_state = OCFS2_LA_ENABLED;
  77. }
  78. spin_unlock(&osb->osb_lock);
  79. }
  80. void ocfs2_la_enable_worker(struct work_struct *work)
  81. {
  82. struct ocfs2_super *osb =
  83. container_of(work, struct ocfs2_super,
  84. la_enable_wq.work);
  85. spin_lock(&osb->osb_lock);
  86. osb->local_alloc_state = OCFS2_LA_ENABLED;
  87. spin_unlock(&osb->osb_lock);
  88. }
  89. /*
  90. * Tell us whether a given allocation should use the local alloc
  91. * file. Otherwise, it has to go to the main bitmap.
  92. *
  93. * This function does semi-dirty reads of local alloc size and state!
  94. * This is ok however, as the values are re-checked once under mutex.
  95. */
  96. int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
  97. {
  98. int ret = 0;
  99. int la_bits;
  100. spin_lock(&osb->osb_lock);
  101. la_bits = osb->local_alloc_bits;
  102. if (!ocfs2_la_state_enabled(osb))
  103. goto bail;
  104. /* la_bits should be at least twice the size (in clusters) of
  105. * a new block group. We want to be sure block group
  106. * allocations go through the local alloc, so allow an
  107. * allocation to take up to half the bitmap. */
  108. if (bits > (la_bits / 2))
  109. goto bail;
  110. ret = 1;
  111. bail:
  112. mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n",
  113. osb->local_alloc_state, (unsigned long long)bits, la_bits, ret);
  114. spin_unlock(&osb->osb_lock);
  115. return ret;
  116. }
  117. int ocfs2_load_local_alloc(struct ocfs2_super *osb)
  118. {
  119. int status = 0;
  120. struct ocfs2_dinode *alloc = NULL;
  121. struct buffer_head *alloc_bh = NULL;
  122. u32 num_used;
  123. struct inode *inode = NULL;
  124. struct ocfs2_local_alloc *la;
  125. mlog_entry_void();
  126. if (osb->local_alloc_bits == 0)
  127. goto bail;
  128. if (osb->local_alloc_bits >= osb->bitmap_cpg) {
  129. mlog(ML_NOTICE, "Requested local alloc window %d is larger "
  130. "than max possible %u. Using defaults.\n",
  131. osb->local_alloc_bits, (osb->bitmap_cpg - 1));
  132. osb->local_alloc_bits =
  133. ocfs2_megabytes_to_clusters(osb->sb,
  134. OCFS2_DEFAULT_LOCAL_ALLOC_SIZE);
  135. }
  136. /* read the alloc off disk */
  137. inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE,
  138. osb->slot_num);
  139. if (!inode) {
  140. status = -EINVAL;
  141. mlog_errno(status);
  142. goto bail;
  143. }
  144. status = ocfs2_read_inode_block_full(inode, &alloc_bh,
  145. OCFS2_BH_IGNORE_CACHE);
  146. if (status < 0) {
  147. mlog_errno(status);
  148. goto bail;
  149. }
  150. alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
  151. la = OCFS2_LOCAL_ALLOC(alloc);
  152. if (!(le32_to_cpu(alloc->i_flags) &
  153. (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) {
  154. mlog(ML_ERROR, "Invalid local alloc inode, %llu\n",
  155. (unsigned long long)OCFS2_I(inode)->ip_blkno);
  156. status = -EINVAL;
  157. goto bail;
  158. }
  159. if ((la->la_size == 0) ||
  160. (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) {
  161. mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n",
  162. le16_to_cpu(la->la_size));
  163. status = -EINVAL;
  164. goto bail;
  165. }
  166. /* do a little verification. */
  167. num_used = ocfs2_local_alloc_count_bits(alloc);
  168. /* hopefully the local alloc has always been recovered before
  169. * we load it. */
  170. if (num_used
  171. || alloc->id1.bitmap1.i_used
  172. || alloc->id1.bitmap1.i_total
  173. || la->la_bm_off)
  174. mlog(ML_ERROR, "Local alloc hasn't been recovered!\n"
  175. "found = %u, set = %u, taken = %u, off = %u\n",
  176. num_used, le32_to_cpu(alloc->id1.bitmap1.i_used),
  177. le32_to_cpu(alloc->id1.bitmap1.i_total),
  178. OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
  179. osb->local_alloc_bh = alloc_bh;
  180. osb->local_alloc_state = OCFS2_LA_ENABLED;
  181. bail:
  182. if (status < 0)
  183. brelse(alloc_bh);
  184. if (inode)
  185. iput(inode);
  186. mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits);
  187. mlog_exit(status);
  188. return status;
  189. }
  190. /*
  191. * return any unused bits to the bitmap and write out a clean
  192. * local_alloc.
  193. *
  194. * local_alloc_bh is optional. If not passed, we will simply use the
  195. * one off osb. If you do pass it however, be warned that it *will* be
  196. * returned brelse'd and NULL'd out.*/
  197. void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
  198. {
  199. int status;
  200. handle_t *handle;
  201. struct inode *local_alloc_inode = NULL;
  202. struct buffer_head *bh = NULL;
  203. struct buffer_head *main_bm_bh = NULL;
  204. struct inode *main_bm_inode = NULL;
  205. struct ocfs2_dinode *alloc_copy = NULL;
  206. struct ocfs2_dinode *alloc = NULL;
  207. mlog_entry_void();
  208. cancel_delayed_work(&osb->la_enable_wq);
  209. flush_workqueue(ocfs2_wq);
  210. if (osb->local_alloc_state == OCFS2_LA_UNUSED)
  211. goto out;
  212. local_alloc_inode =
  213. ocfs2_get_system_file_inode(osb,
  214. LOCAL_ALLOC_SYSTEM_INODE,
  215. osb->slot_num);
  216. if (!local_alloc_inode) {
  217. status = -ENOENT;
  218. mlog_errno(status);
  219. goto out;
  220. }
  221. osb->local_alloc_state = OCFS2_LA_DISABLED;
  222. main_bm_inode = ocfs2_get_system_file_inode(osb,
  223. GLOBAL_BITMAP_SYSTEM_INODE,
  224. OCFS2_INVALID_SLOT);
  225. if (!main_bm_inode) {
  226. status = -EINVAL;
  227. mlog_errno(status);
  228. goto out;
  229. }
  230. mutex_lock(&main_bm_inode->i_mutex);
  231. status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
  232. if (status < 0) {
  233. mlog_errno(status);
  234. goto out_mutex;
  235. }
  236. /* WINDOW_MOVE_CREDITS is a bit heavy... */
  237. handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
  238. if (IS_ERR(handle)) {
  239. mlog_errno(PTR_ERR(handle));
  240. handle = NULL;
  241. goto out_unlock;
  242. }
  243. bh = osb->local_alloc_bh;
  244. alloc = (struct ocfs2_dinode *) bh->b_data;
  245. alloc_copy = kmalloc(bh->b_size, GFP_NOFS);
  246. if (!alloc_copy) {
  247. status = -ENOMEM;
  248. goto out_commit;
  249. }
  250. memcpy(alloc_copy, alloc, bh->b_size);
  251. status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode),
  252. bh, OCFS2_JOURNAL_ACCESS_WRITE);
  253. if (status < 0) {
  254. mlog_errno(status);
  255. goto out_commit;
  256. }
  257. ocfs2_clear_local_alloc(alloc);
  258. status = ocfs2_journal_dirty(handle, bh);
  259. if (status < 0) {
  260. mlog_errno(status);
  261. goto out_commit;
  262. }
  263. brelse(bh);
  264. osb->local_alloc_bh = NULL;
  265. osb->local_alloc_state = OCFS2_LA_UNUSED;
  266. status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
  267. main_bm_inode, main_bm_bh);
  268. if (status < 0)
  269. mlog_errno(status);
  270. out_commit:
  271. ocfs2_commit_trans(osb, handle);
  272. out_unlock:
  273. brelse(main_bm_bh);
  274. ocfs2_inode_unlock(main_bm_inode, 1);
  275. out_mutex:
  276. mutex_unlock(&main_bm_inode->i_mutex);
  277. iput(main_bm_inode);
  278. out:
  279. if (local_alloc_inode)
  280. iput(local_alloc_inode);
  281. if (alloc_copy)
  282. kfree(alloc_copy);
  283. mlog_exit_void();
  284. }
  285. /*
  286. * We want to free the bitmap bits outside of any recovery context as
  287. * we'll need a cluster lock to do so, but we must clear the local
  288. * alloc before giving up the recovered nodes journal. To solve this,
  289. * we kmalloc a copy of the local alloc before it's change for the
  290. * caller to process with ocfs2_complete_local_alloc_recovery
  291. */
  292. int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
  293. int slot_num,
  294. struct ocfs2_dinode **alloc_copy)
  295. {
  296. int status = 0;
  297. struct buffer_head *alloc_bh = NULL;
  298. struct inode *inode = NULL;
  299. struct ocfs2_dinode *alloc;
  300. mlog_entry("(slot_num = %d)\n", slot_num);
  301. *alloc_copy = NULL;
  302. inode = ocfs2_get_system_file_inode(osb,
  303. LOCAL_ALLOC_SYSTEM_INODE,
  304. slot_num);
  305. if (!inode) {
  306. status = -EINVAL;
  307. mlog_errno(status);
  308. goto bail;
  309. }
  310. mutex_lock(&inode->i_mutex);
  311. status = ocfs2_read_inode_block_full(inode, &alloc_bh,
  312. OCFS2_BH_IGNORE_CACHE);
  313. if (status < 0) {
  314. mlog_errno(status);
  315. goto bail;
  316. }
  317. *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL);
  318. if (!(*alloc_copy)) {
  319. status = -ENOMEM;
  320. goto bail;
  321. }
  322. memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size);
  323. alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
  324. ocfs2_clear_local_alloc(alloc);
  325. ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check);
  326. status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode));
  327. if (status < 0)
  328. mlog_errno(status);
  329. bail:
  330. if ((status < 0) && (*alloc_copy)) {
  331. kfree(*alloc_copy);
  332. *alloc_copy = NULL;
  333. }
  334. brelse(alloc_bh);
  335. if (inode) {
  336. mutex_unlock(&inode->i_mutex);
  337. iput(inode);
  338. }
  339. mlog_exit(status);
  340. return status;
  341. }
  342. /*
  343. * Step 2: By now, we've completed the journal recovery, we've stamped
  344. * a clean local alloc on disk and dropped the node out of the
  345. * recovery map. Dlm locks will no longer stall, so lets clear out the
  346. * main bitmap.
  347. */
  348. int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
  349. struct ocfs2_dinode *alloc)
  350. {
  351. int status;
  352. handle_t *handle;
  353. struct buffer_head *main_bm_bh = NULL;
  354. struct inode *main_bm_inode;
  355. mlog_entry_void();
  356. main_bm_inode = ocfs2_get_system_file_inode(osb,
  357. GLOBAL_BITMAP_SYSTEM_INODE,
  358. OCFS2_INVALID_SLOT);
  359. if (!main_bm_inode) {
  360. status = -EINVAL;
  361. mlog_errno(status);
  362. goto out;
  363. }
  364. mutex_lock(&main_bm_inode->i_mutex);
  365. status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
  366. if (status < 0) {
  367. mlog_errno(status);
  368. goto out_mutex;
  369. }
  370. handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
  371. if (IS_ERR(handle)) {
  372. status = PTR_ERR(handle);
  373. handle = NULL;
  374. mlog_errno(status);
  375. goto out_unlock;
  376. }
  377. /* we want the bitmap change to be recorded on disk asap */
  378. handle->h_sync = 1;
  379. status = ocfs2_sync_local_to_main(osb, handle, alloc,
  380. main_bm_inode, main_bm_bh);
  381. if (status < 0)
  382. mlog_errno(status);
  383. ocfs2_commit_trans(osb, handle);
  384. out_unlock:
  385. ocfs2_inode_unlock(main_bm_inode, 1);
  386. out_mutex:
  387. mutex_unlock(&main_bm_inode->i_mutex);
  388. brelse(main_bm_bh);
  389. iput(main_bm_inode);
  390. out:
  391. if (!status)
  392. ocfs2_init_steal_slots(osb);
  393. mlog_exit(status);
  394. return status;
  395. }
  396. /* Check to see if the local alloc window is within ac->ac_max_block */
  397. static int ocfs2_local_alloc_in_range(struct inode *inode,
  398. struct ocfs2_alloc_context *ac,
  399. u32 bits_wanted)
  400. {
  401. struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  402. struct ocfs2_dinode *alloc;
  403. struct ocfs2_local_alloc *la;
  404. int start;
  405. u64 block_off;
  406. if (!ac->ac_max_block)
  407. return 1;
  408. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  409. la = OCFS2_LOCAL_ALLOC(alloc);
  410. start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
  411. if (start == -1) {
  412. mlog_errno(-ENOSPC);
  413. return 0;
  414. }
  415. /*
  416. * Converting (bm_off + start + bits_wanted) to blocks gives us
  417. * the blkno just past our actual allocation. This is perfect
  418. * to compare with ac_max_block.
  419. */
  420. block_off = ocfs2_clusters_to_blocks(inode->i_sb,
  421. le32_to_cpu(la->la_bm_off) +
  422. start + bits_wanted);
  423. mlog(0, "Checking %llu against %llu\n",
  424. (unsigned long long)block_off,
  425. (unsigned long long)ac->ac_max_block);
  426. if (block_off > ac->ac_max_block)
  427. return 0;
  428. return 1;
  429. }
  430. /*
  431. * make sure we've got at least bits_wanted contiguous bits in the
  432. * local alloc. You lose them when you drop i_mutex.
  433. *
  434. * We will add ourselves to the transaction passed in, but may start
  435. * our own in order to shift windows.
  436. */
  437. int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
  438. u32 bits_wanted,
  439. struct ocfs2_alloc_context *ac)
  440. {
  441. int status;
  442. struct ocfs2_dinode *alloc;
  443. struct inode *local_alloc_inode;
  444. unsigned int free_bits;
  445. mlog_entry_void();
  446. BUG_ON(!ac);
  447. local_alloc_inode =
  448. ocfs2_get_system_file_inode(osb,
  449. LOCAL_ALLOC_SYSTEM_INODE,
  450. osb->slot_num);
  451. if (!local_alloc_inode) {
  452. status = -ENOENT;
  453. mlog_errno(status);
  454. goto bail;
  455. }
  456. mutex_lock(&local_alloc_inode->i_mutex);
  457. /*
  458. * We must double check state and allocator bits because
  459. * another process may have changed them while holding i_mutex.
  460. */
  461. spin_lock(&osb->osb_lock);
  462. if (!ocfs2_la_state_enabled(osb) ||
  463. (bits_wanted > osb->local_alloc_bits)) {
  464. spin_unlock(&osb->osb_lock);
  465. status = -ENOSPC;
  466. goto bail;
  467. }
  468. spin_unlock(&osb->osb_lock);
  469. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  470. #ifdef CONFIG_OCFS2_DEBUG_FS
  471. if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
  472. ocfs2_local_alloc_count_bits(alloc)) {
  473. ocfs2_error(osb->sb, "local alloc inode %llu says it has "
  474. "%u free bits, but a count shows %u",
  475. (unsigned long long)le64_to_cpu(alloc->i_blkno),
  476. le32_to_cpu(alloc->id1.bitmap1.i_used),
  477. ocfs2_local_alloc_count_bits(alloc));
  478. status = -EIO;
  479. goto bail;
  480. }
  481. #endif
  482. free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
  483. le32_to_cpu(alloc->id1.bitmap1.i_used);
  484. if (bits_wanted > free_bits) {
  485. /* uhoh, window change time. */
  486. status =
  487. ocfs2_local_alloc_slide_window(osb, local_alloc_inode);
  488. if (status < 0) {
  489. if (status != -ENOSPC)
  490. mlog_errno(status);
  491. goto bail;
  492. }
  493. /*
  494. * Under certain conditions, the window slide code
  495. * might have reduced the number of bits available or
  496. * disabled the the local alloc entirely. Re-check
  497. * here and return -ENOSPC if necessary.
  498. */
  499. status = -ENOSPC;
  500. if (!ocfs2_la_state_enabled(osb))
  501. goto bail;
  502. free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
  503. le32_to_cpu(alloc->id1.bitmap1.i_used);
  504. if (bits_wanted > free_bits)
  505. goto bail;
  506. }
  507. if (ac->ac_max_block)
  508. mlog(0, "Calling in_range for max block %llu\n",
  509. (unsigned long long)ac->ac_max_block);
  510. if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
  511. bits_wanted)) {
  512. /*
  513. * The window is outside ac->ac_max_block.
  514. * This errno tells the caller to keep localalloc enabled
  515. * but to get the allocation from the main bitmap.
  516. */
  517. status = -EFBIG;
  518. goto bail;
  519. }
  520. ac->ac_inode = local_alloc_inode;
  521. /* We should never use localalloc from another slot */
  522. ac->ac_alloc_slot = osb->slot_num;
  523. ac->ac_which = OCFS2_AC_USE_LOCAL;
  524. get_bh(osb->local_alloc_bh);
  525. ac->ac_bh = osb->local_alloc_bh;
  526. status = 0;
  527. bail:
  528. if (status < 0 && local_alloc_inode) {
  529. mutex_unlock(&local_alloc_inode->i_mutex);
  530. iput(local_alloc_inode);
  531. }
  532. mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num,
  533. status);
  534. mlog_exit(status);
  535. return status;
  536. }
  537. int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
  538. handle_t *handle,
  539. struct ocfs2_alloc_context *ac,
  540. u32 bits_wanted,
  541. u32 *bit_off,
  542. u32 *num_bits)
  543. {
  544. int status, start;
  545. struct inode *local_alloc_inode;
  546. void *bitmap;
  547. struct ocfs2_dinode *alloc;
  548. struct ocfs2_local_alloc *la;
  549. mlog_entry_void();
  550. BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
  551. local_alloc_inode = ac->ac_inode;
  552. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  553. la = OCFS2_LOCAL_ALLOC(alloc);
  554. start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
  555. if (start == -1) {
  556. /* TODO: Shouldn't we just BUG here? */
  557. status = -ENOSPC;
  558. mlog_errno(status);
  559. goto bail;
  560. }
  561. bitmap = la->la_bitmap;
  562. *bit_off = le32_to_cpu(la->la_bm_off) + start;
  563. /* local alloc is always contiguous by nature -- we never
  564. * delete bits from it! */
  565. *num_bits = bits_wanted;
  566. status = ocfs2_journal_access_di(handle,
  567. INODE_CACHE(local_alloc_inode),
  568. osb->local_alloc_bh,
  569. OCFS2_JOURNAL_ACCESS_WRITE);
  570. if (status < 0) {
  571. mlog_errno(status);
  572. goto bail;
  573. }
  574. while(bits_wanted--)
  575. ocfs2_set_bit(start++, bitmap);
  576. le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
  577. status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
  578. if (status < 0) {
  579. mlog_errno(status);
  580. goto bail;
  581. }
  582. status = 0;
  583. bail:
  584. mlog_exit(status);
  585. return status;
  586. }
  587. static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
  588. {
  589. int i;
  590. u8 *buffer;
  591. u32 count = 0;
  592. struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
  593. mlog_entry_void();
  594. buffer = la->la_bitmap;
  595. for (i = 0; i < le16_to_cpu(la->la_size); i++)
  596. count += hweight8(buffer[i]);
  597. mlog_exit(count);
  598. return count;
  599. }
  600. static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
  601. struct ocfs2_dinode *alloc,
  602. u32 numbits)
  603. {
  604. int numfound, bitoff, left, startoff, lastzero;
  605. void *bitmap = NULL;
  606. mlog_entry("(numbits wanted = %u)\n", numbits);
  607. if (!alloc->id1.bitmap1.i_total) {
  608. mlog(0, "No bits in my window!\n");
  609. bitoff = -1;
  610. goto bail;
  611. }
  612. bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
  613. numfound = bitoff = startoff = 0;
  614. lastzero = -1;
  615. left = le32_to_cpu(alloc->id1.bitmap1.i_total);
  616. while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) {
  617. if (bitoff == left) {
  618. /* mlog(0, "bitoff (%d) == left", bitoff); */
  619. break;
  620. }
  621. /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, "
  622. "numfound = %d\n", bitoff, startoff, numfound);*/
  623. /* Ok, we found a zero bit... is it contig. or do we
  624. * start over?*/
  625. if (bitoff == startoff) {
  626. /* we found a zero */
  627. numfound++;
  628. startoff++;
  629. } else {
  630. /* got a zero after some ones */
  631. numfound = 1;
  632. startoff = bitoff+1;
  633. }
  634. /* we got everything we needed */
  635. if (numfound == numbits) {
  636. /* mlog(0, "Found it all!\n"); */
  637. break;
  638. }
  639. }
  640. mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
  641. numfound);
  642. if (numfound == numbits)
  643. bitoff = startoff - numfound;
  644. else
  645. bitoff = -1;
  646. bail:
  647. mlog_exit(bitoff);
  648. return bitoff;
  649. }
  650. static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc)
  651. {
  652. struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
  653. int i;
  654. mlog_entry_void();
  655. alloc->id1.bitmap1.i_total = 0;
  656. alloc->id1.bitmap1.i_used = 0;
  657. la->la_bm_off = 0;
  658. for(i = 0; i < le16_to_cpu(la->la_size); i++)
  659. la->la_bitmap[i] = 0;
  660. mlog_exit_void();
  661. }
  662. #if 0
  663. /* turn this on and uncomment below to aid debugging window shifts. */
  664. static void ocfs2_verify_zero_bits(unsigned long *bitmap,
  665. unsigned int start,
  666. unsigned int count)
  667. {
  668. unsigned int tmp = count;
  669. while(tmp--) {
  670. if (ocfs2_test_bit(start + tmp, bitmap)) {
  671. printk("ocfs2_verify_zero_bits: start = %u, count = "
  672. "%u\n", start, count);
  673. printk("ocfs2_verify_zero_bits: bit %u is set!",
  674. start + tmp);
  675. BUG();
  676. }
  677. }
  678. }
  679. #endif
  680. /*
  681. * sync the local alloc to main bitmap.
  682. *
  683. * assumes you've already locked the main bitmap -- the bitmap inode
  684. * passed is used for caching.
  685. */
  686. static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
  687. handle_t *handle,
  688. struct ocfs2_dinode *alloc,
  689. struct inode *main_bm_inode,
  690. struct buffer_head *main_bm_bh)
  691. {
  692. int status = 0;
  693. int bit_off, left, count, start;
  694. u64 la_start_blk;
  695. u64 blkno;
  696. void *bitmap;
  697. struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
  698. mlog_entry("total = %u, used = %u\n",
  699. le32_to_cpu(alloc->id1.bitmap1.i_total),
  700. le32_to_cpu(alloc->id1.bitmap1.i_used));
  701. if (!alloc->id1.bitmap1.i_total) {
  702. mlog(0, "nothing to sync!\n");
  703. goto bail;
  704. }
  705. if (le32_to_cpu(alloc->id1.bitmap1.i_used) ==
  706. le32_to_cpu(alloc->id1.bitmap1.i_total)) {
  707. mlog(0, "all bits were taken!\n");
  708. goto bail;
  709. }
  710. la_start_blk = ocfs2_clusters_to_blocks(osb->sb,
  711. le32_to_cpu(la->la_bm_off));
  712. bitmap = la->la_bitmap;
  713. start = count = bit_off = 0;
  714. left = le32_to_cpu(alloc->id1.bitmap1.i_total);
  715. while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start))
  716. != -1) {
  717. if ((bit_off < left) && (bit_off == start)) {
  718. count++;
  719. start++;
  720. continue;
  721. }
  722. if (count) {
  723. blkno = la_start_blk +
  724. ocfs2_clusters_to_blocks(osb->sb,
  725. start - count);
  726. mlog(0, "freeing %u bits starting at local alloc bit "
  727. "%u (la_start_blk = %llu, blkno = %llu)\n",
  728. count, start - count,
  729. (unsigned long long)la_start_blk,
  730. (unsigned long long)blkno);
  731. status = ocfs2_release_clusters(handle,
  732. main_bm_inode,
  733. main_bm_bh, blkno,
  734. count);
  735. if (status < 0) {
  736. mlog_errno(status);
  737. goto bail;
  738. }
  739. }
  740. if (bit_off >= left)
  741. break;
  742. count = 1;
  743. start = bit_off + 1;
  744. }
  745. bail:
  746. mlog_exit(status);
  747. return status;
  748. }
  749. enum ocfs2_la_event {
  750. OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */
  751. OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has
  752. * enough bits theoretically
  753. * free, but a contiguous
  754. * allocation could not be
  755. * found. */
  756. OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have
  757. * enough bits free to satisfy
  758. * our request. */
  759. };
  760. #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ)
  761. /*
  762. * Given an event, calculate the size of our next local alloc window.
  763. *
  764. * This should always be called under i_mutex of the local alloc inode
  765. * so that local alloc disabling doesn't race with processes trying to
  766. * use the allocator.
  767. *
  768. * Returns the state which the local alloc was left in. This value can
  769. * be ignored by some paths.
  770. */
  771. static int ocfs2_recalc_la_window(struct ocfs2_super *osb,
  772. enum ocfs2_la_event event)
  773. {
  774. unsigned int bits;
  775. int state;
  776. spin_lock(&osb->osb_lock);
  777. if (osb->local_alloc_state == OCFS2_LA_DISABLED) {
  778. WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED);
  779. goto out_unlock;
  780. }
  781. /*
  782. * ENOSPC and fragmentation are treated similarly for now.
  783. */
  784. if (event == OCFS2_LA_EVENT_ENOSPC ||
  785. event == OCFS2_LA_EVENT_FRAGMENTED) {
  786. /*
  787. * We ran out of contiguous space in the primary
  788. * bitmap. Drastically reduce the number of bits used
  789. * by local alloc until we have to disable it.
  790. */
  791. bits = osb->local_alloc_bits >> 1;
  792. if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) {
  793. /*
  794. * By setting state to THROTTLED, we'll keep
  795. * the number of local alloc bits used down
  796. * until an event occurs which would give us
  797. * reason to assume the bitmap situation might
  798. * have changed.
  799. */
  800. osb->local_alloc_state = OCFS2_LA_THROTTLED;
  801. osb->local_alloc_bits = bits;
  802. } else {
  803. osb->local_alloc_state = OCFS2_LA_DISABLED;
  804. }
  805. queue_delayed_work(ocfs2_wq, &osb->la_enable_wq,
  806. OCFS2_LA_ENABLE_INTERVAL);
  807. goto out_unlock;
  808. }
  809. /*
  810. * Don't increase the size of the local alloc window until we
  811. * know we might be able to fulfill the request. Otherwise, we
  812. * risk bouncing around the global bitmap during periods of
  813. * low space.
  814. */
  815. if (osb->local_alloc_state != OCFS2_LA_THROTTLED)
  816. osb->local_alloc_bits = osb->local_alloc_default_bits;
  817. out_unlock:
  818. state = osb->local_alloc_state;
  819. spin_unlock(&osb->osb_lock);
  820. return state;
  821. }
  822. static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
  823. struct ocfs2_alloc_context **ac,
  824. struct inode **bitmap_inode,
  825. struct buffer_head **bitmap_bh)
  826. {
  827. int status;
  828. *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
  829. if (!(*ac)) {
  830. status = -ENOMEM;
  831. mlog_errno(status);
  832. goto bail;
  833. }
  834. retry_enospc:
  835. (*ac)->ac_bits_wanted = osb->local_alloc_default_bits;
  836. status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
  837. if (status == -ENOSPC) {
  838. if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
  839. OCFS2_LA_DISABLED)
  840. goto bail;
  841. ocfs2_free_ac_resource(*ac);
  842. memset(*ac, 0, sizeof(struct ocfs2_alloc_context));
  843. goto retry_enospc;
  844. }
  845. if (status < 0) {
  846. mlog_errno(status);
  847. goto bail;
  848. }
  849. *bitmap_inode = (*ac)->ac_inode;
  850. igrab(*bitmap_inode);
  851. *bitmap_bh = (*ac)->ac_bh;
  852. get_bh(*bitmap_bh);
  853. status = 0;
  854. bail:
  855. if ((status < 0) && *ac) {
  856. ocfs2_free_alloc_context(*ac);
  857. *ac = NULL;
  858. }
  859. mlog_exit(status);
  860. return status;
  861. }
  862. /*
  863. * pass it the bitmap lock in lock_bh if you have it.
  864. */
  865. static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
  866. handle_t *handle,
  867. struct ocfs2_alloc_context *ac)
  868. {
  869. int status = 0;
  870. u32 cluster_off, cluster_count;
  871. struct ocfs2_dinode *alloc = NULL;
  872. struct ocfs2_local_alloc *la;
  873. mlog_entry_void();
  874. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  875. la = OCFS2_LOCAL_ALLOC(alloc);
  876. if (alloc->id1.bitmap1.i_total)
  877. mlog(0, "asking me to alloc a new window over a non-empty "
  878. "one\n");
  879. mlog(0, "Allocating %u clusters for a new window.\n",
  880. osb->local_alloc_bits);
  881. /* Instruct the allocation code to try the most recently used
  882. * cluster group. We'll re-record the group used this pass
  883. * below. */
  884. ac->ac_last_group = osb->la_last_gd;
  885. /* we used the generic suballoc reserve function, but we set
  886. * everything up nicely, so there's no reason why we can't use
  887. * the more specific cluster api to claim bits. */
  888. status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits,
  889. &cluster_off, &cluster_count);
  890. if (status == -ENOSPC) {
  891. retry_enospc:
  892. /*
  893. * Note: We could also try syncing the journal here to
  894. * allow use of any free bits which the current
  895. * transaction can't give us access to. --Mark
  896. */
  897. if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) ==
  898. OCFS2_LA_DISABLED)
  899. goto bail;
  900. ac->ac_bits_wanted = osb->local_alloc_default_bits;
  901. status = ocfs2_claim_clusters(osb, handle, ac,
  902. osb->local_alloc_bits,
  903. &cluster_off,
  904. &cluster_count);
  905. if (status == -ENOSPC)
  906. goto retry_enospc;
  907. /*
  908. * We only shrunk the *minimum* number of in our
  909. * request - it's entirely possible that the allocator
  910. * might give us more than we asked for.
  911. */
  912. if (status == 0) {
  913. spin_lock(&osb->osb_lock);
  914. osb->local_alloc_bits = cluster_count;
  915. spin_unlock(&osb->osb_lock);
  916. }
  917. }
  918. if (status < 0) {
  919. if (status != -ENOSPC)
  920. mlog_errno(status);
  921. goto bail;
  922. }
  923. osb->la_last_gd = ac->ac_last_group;
  924. la->la_bm_off = cpu_to_le32(cluster_off);
  925. alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count);
  926. /* just in case... In the future when we find space ourselves,
  927. * we don't have to get all contiguous -- but we'll have to
  928. * set all previously used bits in bitmap and update
  929. * la_bits_set before setting the bits in the main bitmap. */
  930. alloc->id1.bitmap1.i_used = 0;
  931. memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
  932. le16_to_cpu(la->la_size));
  933. mlog(0, "New window allocated:\n");
  934. mlog(0, "window la_bm_off = %u\n",
  935. OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
  936. mlog(0, "window bits = %u\n", le32_to_cpu(alloc->id1.bitmap1.i_total));
  937. bail:
  938. mlog_exit(status);
  939. return status;
  940. }
  941. /* Note that we do *NOT* lock the local alloc inode here as
  942. * it's been locked already for us. */
  943. static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
  944. struct inode *local_alloc_inode)
  945. {
  946. int status = 0;
  947. struct buffer_head *main_bm_bh = NULL;
  948. struct inode *main_bm_inode = NULL;
  949. handle_t *handle = NULL;
  950. struct ocfs2_dinode *alloc;
  951. struct ocfs2_dinode *alloc_copy = NULL;
  952. struct ocfs2_alloc_context *ac = NULL;
  953. mlog_entry_void();
  954. ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE);
  955. /* This will lock the main bitmap for us. */
  956. status = ocfs2_local_alloc_reserve_for_window(osb,
  957. &ac,
  958. &main_bm_inode,
  959. &main_bm_bh);
  960. if (status < 0) {
  961. if (status != -ENOSPC)
  962. mlog_errno(status);
  963. goto bail;
  964. }
  965. handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
  966. if (IS_ERR(handle)) {
  967. status = PTR_ERR(handle);
  968. handle = NULL;
  969. mlog_errno(status);
  970. goto bail;
  971. }
  972. alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
  973. /* We want to clear the local alloc before doing anything
  974. * else, so that if we error later during this operation,
  975. * local alloc shutdown won't try to double free main bitmap
  976. * bits. Make a copy so the sync function knows which bits to
  977. * free. */
  978. alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS);
  979. if (!alloc_copy) {
  980. status = -ENOMEM;
  981. mlog_errno(status);
  982. goto bail;
  983. }
  984. memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
  985. status = ocfs2_journal_access_di(handle,
  986. INODE_CACHE(local_alloc_inode),
  987. osb->local_alloc_bh,
  988. OCFS2_JOURNAL_ACCESS_WRITE);
  989. if (status < 0) {
  990. mlog_errno(status);
  991. goto bail;
  992. }
  993. ocfs2_clear_local_alloc(alloc);
  994. status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
  995. if (status < 0) {
  996. mlog_errno(status);
  997. goto bail;
  998. }
  999. status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
  1000. main_bm_inode, main_bm_bh);
  1001. if (status < 0) {
  1002. mlog_errno(status);
  1003. goto bail;
  1004. }
  1005. status = ocfs2_local_alloc_new_window(osb, handle, ac);
  1006. if (status < 0) {
  1007. if (status != -ENOSPC)
  1008. mlog_errno(status);
  1009. goto bail;
  1010. }
  1011. atomic_inc(&osb->alloc_stats.moves);
  1012. status = 0;
  1013. bail:
  1014. if (handle)
  1015. ocfs2_commit_trans(osb, handle);
  1016. brelse(main_bm_bh);
  1017. if (main_bm_inode)
  1018. iput(main_bm_inode);
  1019. if (alloc_copy)
  1020. kfree(alloc_copy);
  1021. if (ac)
  1022. ocfs2_free_alloc_context(ac);
  1023. mlog_exit(status);
  1024. return status;
  1025. }