alloc.c 53 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042
  1. /* -*- mode: c; c-basic-offset: 8; -*-
  2. * vim: noexpandtab sw=8 ts=8 sts=0:
  3. *
  4. * alloc.c
  5. *
  6. * Extent allocs and frees
  7. *
  8. * Copyright (C) 2002, 2004 Oracle. All rights reserved.
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2 of the License, or (at your option) any later version.
  14. *
  15. * This program is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU General Public
  21. * License along with this program; if not, write to the
  22. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23. * Boston, MA 021110-1307, USA.
  24. */
  25. #include <linux/fs.h>
  26. #include <linux/types.h>
  27. #include <linux/slab.h>
  28. #include <linux/highmem.h>
  29. #define MLOG_MASK_PREFIX ML_DISK_ALLOC
  30. #include <cluster/masklog.h>
  31. #include "ocfs2.h"
  32. #include "alloc.h"
  33. #include "dlmglue.h"
  34. #include "extent_map.h"
  35. #include "inode.h"
  36. #include "journal.h"
  37. #include "localalloc.h"
  38. #include "suballoc.h"
  39. #include "sysfile.h"
  40. #include "file.h"
  41. #include "super.h"
  42. #include "uptodate.h"
  43. #include "buffer_head_io.h"
  44. static int ocfs2_extent_contig(struct inode *inode,
  45. struct ocfs2_extent_rec *ext,
  46. u64 blkno);
  47. static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
  48. struct ocfs2_journal_handle *handle,
  49. struct inode *inode,
  50. int wanted,
  51. struct ocfs2_alloc_context *meta_ac,
  52. struct buffer_head *bhs[]);
  53. static int ocfs2_add_branch(struct ocfs2_super *osb,
  54. struct ocfs2_journal_handle *handle,
  55. struct inode *inode,
  56. struct buffer_head *fe_bh,
  57. struct buffer_head *eb_bh,
  58. struct buffer_head *last_eb_bh,
  59. struct ocfs2_alloc_context *meta_ac);
  60. static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
  61. struct ocfs2_journal_handle *handle,
  62. struct inode *inode,
  63. struct buffer_head *fe_bh,
  64. struct ocfs2_alloc_context *meta_ac,
  65. struct buffer_head **ret_new_eb_bh);
  66. static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
  67. struct ocfs2_journal_handle *handle,
  68. struct inode *inode,
  69. struct buffer_head *fe_bh,
  70. u64 blkno,
  71. u32 new_clusters);
  72. static int ocfs2_find_branch_target(struct ocfs2_super *osb,
  73. struct inode *inode,
  74. struct buffer_head *fe_bh,
  75. struct buffer_head **target_bh);
  76. static int ocfs2_find_new_last_ext_blk(struct ocfs2_super *osb,
  77. struct inode *inode,
  78. struct ocfs2_dinode *fe,
  79. unsigned int new_i_clusters,
  80. struct buffer_head *old_last_eb,
  81. struct buffer_head **new_last_eb);
  82. static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
  83. static int ocfs2_extent_contig(struct inode *inode,
  84. struct ocfs2_extent_rec *ext,
  85. u64 blkno)
  86. {
  87. return blkno == (le64_to_cpu(ext->e_blkno) +
  88. ocfs2_clusters_to_blocks(inode->i_sb,
  89. le32_to_cpu(ext->e_clusters)));
  90. }
  91. /*
  92. * How many free extents have we got before we need more meta data?
  93. */
  94. int ocfs2_num_free_extents(struct ocfs2_super *osb,
  95. struct inode *inode,
  96. struct ocfs2_dinode *fe)
  97. {
  98. int retval;
  99. struct ocfs2_extent_list *el;
  100. struct ocfs2_extent_block *eb;
  101. struct buffer_head *eb_bh = NULL;
  102. mlog_entry_void();
  103. if (!OCFS2_IS_VALID_DINODE(fe)) {
  104. OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
  105. retval = -EIO;
  106. goto bail;
  107. }
  108. if (fe->i_last_eb_blk) {
  109. retval = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
  110. &eb_bh, OCFS2_BH_CACHED, inode);
  111. if (retval < 0) {
  112. mlog_errno(retval);
  113. goto bail;
  114. }
  115. eb = (struct ocfs2_extent_block *) eb_bh->b_data;
  116. el = &eb->h_list;
  117. } else
  118. el = &fe->id2.i_list;
  119. BUG_ON(el->l_tree_depth != 0);
  120. retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec);
  121. bail:
  122. if (eb_bh)
  123. brelse(eb_bh);
  124. mlog_exit(retval);
  125. return retval;
  126. }
  127. /* expects array to already be allocated
  128. *
  129. * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and
  130. * l_count for you
  131. */
  132. static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
  133. struct ocfs2_journal_handle *handle,
  134. struct inode *inode,
  135. int wanted,
  136. struct ocfs2_alloc_context *meta_ac,
  137. struct buffer_head *bhs[])
  138. {
  139. int count, status, i;
  140. u16 suballoc_bit_start;
  141. u32 num_got;
  142. u64 first_blkno;
  143. struct ocfs2_extent_block *eb;
  144. mlog_entry_void();
  145. count = 0;
  146. while (count < wanted) {
  147. status = ocfs2_claim_metadata(osb,
  148. handle,
  149. meta_ac,
  150. wanted - count,
  151. &suballoc_bit_start,
  152. &num_got,
  153. &first_blkno);
  154. if (status < 0) {
  155. mlog_errno(status);
  156. goto bail;
  157. }
  158. for(i = count; i < (num_got + count); i++) {
  159. bhs[i] = sb_getblk(osb->sb, first_blkno);
  160. if (bhs[i] == NULL) {
  161. status = -EIO;
  162. mlog_errno(status);
  163. goto bail;
  164. }
  165. ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
  166. status = ocfs2_journal_access(handle, inode, bhs[i],
  167. OCFS2_JOURNAL_ACCESS_CREATE);
  168. if (status < 0) {
  169. mlog_errno(status);
  170. goto bail;
  171. }
  172. memset(bhs[i]->b_data, 0, osb->sb->s_blocksize);
  173. eb = (struct ocfs2_extent_block *) bhs[i]->b_data;
  174. /* Ok, setup the minimal stuff here. */
  175. strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
  176. eb->h_blkno = cpu_to_le64(first_blkno);
  177. eb->h_fs_generation = cpu_to_le32(osb->fs_generation);
  178. #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS
  179. /* we always use slot zero's suballocator */
  180. eb->h_suballoc_slot = 0;
  181. #else
  182. eb->h_suballoc_slot = cpu_to_le16(osb->slot_num);
  183. #endif
  184. eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start);
  185. eb->h_list.l_count =
  186. cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb));
  187. suballoc_bit_start++;
  188. first_blkno++;
  189. /* We'll also be dirtied by the caller, so
  190. * this isn't absolutely necessary. */
  191. status = ocfs2_journal_dirty(handle, bhs[i]);
  192. if (status < 0) {
  193. mlog_errno(status);
  194. goto bail;
  195. }
  196. }
  197. count += num_got;
  198. }
  199. status = 0;
  200. bail:
  201. if (status < 0) {
  202. for(i = 0; i < wanted; i++) {
  203. if (bhs[i])
  204. brelse(bhs[i]);
  205. bhs[i] = NULL;
  206. }
  207. }
  208. mlog_exit(status);
  209. return status;
  210. }
  211. /*
  212. * Add an entire tree branch to our inode. eb_bh is the extent block
  213. * to start at, if we don't want to start the branch at the dinode
  214. * structure.
  215. *
  216. * last_eb_bh is required as we have to update it's next_leaf pointer
  217. * for the new last extent block.
  218. *
  219. * the new branch will be 'empty' in the sense that every block will
  220. * contain a single record with e_clusters == 0.
  221. */
  222. static int ocfs2_add_branch(struct ocfs2_super *osb,
  223. struct ocfs2_journal_handle *handle,
  224. struct inode *inode,
  225. struct buffer_head *fe_bh,
  226. struct buffer_head *eb_bh,
  227. struct buffer_head *last_eb_bh,
  228. struct ocfs2_alloc_context *meta_ac)
  229. {
  230. int status, new_blocks, i;
  231. u64 next_blkno, new_last_eb_blk;
  232. struct buffer_head *bh;
  233. struct buffer_head **new_eb_bhs = NULL;
  234. struct ocfs2_dinode *fe;
  235. struct ocfs2_extent_block *eb;
  236. struct ocfs2_extent_list *eb_el;
  237. struct ocfs2_extent_list *el;
  238. mlog_entry_void();
  239. BUG_ON(!last_eb_bh);
  240. fe = (struct ocfs2_dinode *) fe_bh->b_data;
  241. if (eb_bh) {
  242. eb = (struct ocfs2_extent_block *) eb_bh->b_data;
  243. el = &eb->h_list;
  244. } else
  245. el = &fe->id2.i_list;
  246. /* we never add a branch to a leaf. */
  247. BUG_ON(!el->l_tree_depth);
  248. new_blocks = le16_to_cpu(el->l_tree_depth);
  249. /* allocate the number of new eb blocks we need */
  250. new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *),
  251. GFP_KERNEL);
  252. if (!new_eb_bhs) {
  253. status = -ENOMEM;
  254. mlog_errno(status);
  255. goto bail;
  256. }
  257. status = ocfs2_create_new_meta_bhs(osb, handle, inode, new_blocks,
  258. meta_ac, new_eb_bhs);
  259. if (status < 0) {
  260. mlog_errno(status);
  261. goto bail;
  262. }
  263. /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be
  264. * linked with the rest of the tree.
  265. * conversly, new_eb_bhs[0] is the new bottommost leaf.
  266. *
  267. * when we leave the loop, new_last_eb_blk will point to the
  268. * newest leaf, and next_blkno will point to the topmost extent
  269. * block. */
  270. next_blkno = new_last_eb_blk = 0;
  271. for(i = 0; i < new_blocks; i++) {
  272. bh = new_eb_bhs[i];
  273. eb = (struct ocfs2_extent_block *) bh->b_data;
  274. if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
  275. OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
  276. status = -EIO;
  277. goto bail;
  278. }
  279. eb_el = &eb->h_list;
  280. status = ocfs2_journal_access(handle, inode, bh,
  281. OCFS2_JOURNAL_ACCESS_CREATE);
  282. if (status < 0) {
  283. mlog_errno(status);
  284. goto bail;
  285. }
  286. eb->h_next_leaf_blk = 0;
  287. eb_el->l_tree_depth = cpu_to_le16(i);
  288. eb_el->l_next_free_rec = cpu_to_le16(1);
  289. eb_el->l_recs[0].e_cpos = fe->i_clusters;
  290. eb_el->l_recs[0].e_blkno = cpu_to_le64(next_blkno);
  291. eb_el->l_recs[0].e_clusters = cpu_to_le32(0);
  292. if (!eb_el->l_tree_depth)
  293. new_last_eb_blk = le64_to_cpu(eb->h_blkno);
  294. status = ocfs2_journal_dirty(handle, bh);
  295. if (status < 0) {
  296. mlog_errno(status);
  297. goto bail;
  298. }
  299. next_blkno = le64_to_cpu(eb->h_blkno);
  300. }
  301. /* This is a bit hairy. We want to update up to three blocks
  302. * here without leaving any of them in an inconsistent state
  303. * in case of error. We don't have to worry about
  304. * journal_dirty erroring as it won't unless we've aborted the
  305. * handle (in which case we would never be here) so reserving
  306. * the write with journal_access is all we need to do. */
  307. status = ocfs2_journal_access(handle, inode, last_eb_bh,
  308. OCFS2_JOURNAL_ACCESS_WRITE);
  309. if (status < 0) {
  310. mlog_errno(status);
  311. goto bail;
  312. }
  313. status = ocfs2_journal_access(handle, inode, fe_bh,
  314. OCFS2_JOURNAL_ACCESS_WRITE);
  315. if (status < 0) {
  316. mlog_errno(status);
  317. goto bail;
  318. }
  319. if (eb_bh) {
  320. status = ocfs2_journal_access(handle, inode, eb_bh,
  321. OCFS2_JOURNAL_ACCESS_WRITE);
  322. if (status < 0) {
  323. mlog_errno(status);
  324. goto bail;
  325. }
  326. }
  327. /* Link the new branch into the rest of the tree (el will
  328. * either be on the fe, or the extent block passed in. */
  329. i = le16_to_cpu(el->l_next_free_rec);
  330. el->l_recs[i].e_blkno = cpu_to_le64(next_blkno);
  331. el->l_recs[i].e_cpos = fe->i_clusters;
  332. el->l_recs[i].e_clusters = 0;
  333. le16_add_cpu(&el->l_next_free_rec, 1);
  334. /* fe needs a new last extent block pointer, as does the
  335. * next_leaf on the previously last-extent-block. */
  336. fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk);
  337. eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
  338. eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk);
  339. status = ocfs2_journal_dirty(handle, last_eb_bh);
  340. if (status < 0)
  341. mlog_errno(status);
  342. status = ocfs2_journal_dirty(handle, fe_bh);
  343. if (status < 0)
  344. mlog_errno(status);
  345. if (eb_bh) {
  346. status = ocfs2_journal_dirty(handle, eb_bh);
  347. if (status < 0)
  348. mlog_errno(status);
  349. }
  350. status = 0;
  351. bail:
  352. if (new_eb_bhs) {
  353. for (i = 0; i < new_blocks; i++)
  354. if (new_eb_bhs[i])
  355. brelse(new_eb_bhs[i]);
  356. kfree(new_eb_bhs);
  357. }
  358. mlog_exit(status);
  359. return status;
  360. }
  361. /*
  362. * adds another level to the allocation tree.
  363. * returns back the new extent block so you can add a branch to it
  364. * after this call.
  365. */
  366. static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
  367. struct ocfs2_journal_handle *handle,
  368. struct inode *inode,
  369. struct buffer_head *fe_bh,
  370. struct ocfs2_alloc_context *meta_ac,
  371. struct buffer_head **ret_new_eb_bh)
  372. {
  373. int status, i;
  374. struct buffer_head *new_eb_bh = NULL;
  375. struct ocfs2_dinode *fe;
  376. struct ocfs2_extent_block *eb;
  377. struct ocfs2_extent_list *fe_el;
  378. struct ocfs2_extent_list *eb_el;
  379. mlog_entry_void();
  380. status = ocfs2_create_new_meta_bhs(osb, handle, inode, 1, meta_ac,
  381. &new_eb_bh);
  382. if (status < 0) {
  383. mlog_errno(status);
  384. goto bail;
  385. }
  386. eb = (struct ocfs2_extent_block *) new_eb_bh->b_data;
  387. if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
  388. OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
  389. status = -EIO;
  390. goto bail;
  391. }
  392. eb_el = &eb->h_list;
  393. fe = (struct ocfs2_dinode *) fe_bh->b_data;
  394. fe_el = &fe->id2.i_list;
  395. status = ocfs2_journal_access(handle, inode, new_eb_bh,
  396. OCFS2_JOURNAL_ACCESS_CREATE);
  397. if (status < 0) {
  398. mlog_errno(status);
  399. goto bail;
  400. }
  401. /* copy the fe data into the new extent block */
  402. eb_el->l_tree_depth = fe_el->l_tree_depth;
  403. eb_el->l_next_free_rec = fe_el->l_next_free_rec;
  404. for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++) {
  405. eb_el->l_recs[i].e_cpos = fe_el->l_recs[i].e_cpos;
  406. eb_el->l_recs[i].e_clusters = fe_el->l_recs[i].e_clusters;
  407. eb_el->l_recs[i].e_blkno = fe_el->l_recs[i].e_blkno;
  408. }
  409. status = ocfs2_journal_dirty(handle, new_eb_bh);
  410. if (status < 0) {
  411. mlog_errno(status);
  412. goto bail;
  413. }
  414. status = ocfs2_journal_access(handle, inode, fe_bh,
  415. OCFS2_JOURNAL_ACCESS_WRITE);
  416. if (status < 0) {
  417. mlog_errno(status);
  418. goto bail;
  419. }
  420. /* update fe now */
  421. le16_add_cpu(&fe_el->l_tree_depth, 1);
  422. fe_el->l_recs[0].e_cpos = 0;
  423. fe_el->l_recs[0].e_blkno = eb->h_blkno;
  424. fe_el->l_recs[0].e_clusters = fe->i_clusters;
  425. for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++) {
  426. fe_el->l_recs[i].e_cpos = 0;
  427. fe_el->l_recs[i].e_clusters = 0;
  428. fe_el->l_recs[i].e_blkno = 0;
  429. }
  430. fe_el->l_next_free_rec = cpu_to_le16(1);
  431. /* If this is our 1st tree depth shift, then last_eb_blk
  432. * becomes the allocated extent block */
  433. if (fe_el->l_tree_depth == cpu_to_le16(1))
  434. fe->i_last_eb_blk = eb->h_blkno;
  435. status = ocfs2_journal_dirty(handle, fe_bh);
  436. if (status < 0) {
  437. mlog_errno(status);
  438. goto bail;
  439. }
  440. *ret_new_eb_bh = new_eb_bh;
  441. new_eb_bh = NULL;
  442. status = 0;
  443. bail:
  444. if (new_eb_bh)
  445. brelse(new_eb_bh);
  446. mlog_exit(status);
  447. return status;
  448. }
  449. /*
  450. * Expects the tree to already have room in the rightmost leaf for the
  451. * extent. Updates all the extent blocks (and the dinode) on the way
  452. * down.
  453. */
  454. static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
  455. struct ocfs2_journal_handle *handle,
  456. struct inode *inode,
  457. struct buffer_head *fe_bh,
  458. u64 start_blk,
  459. u32 new_clusters)
  460. {
  461. int status, i, num_bhs = 0;
  462. u64 next_blkno;
  463. u16 next_free;
  464. struct buffer_head **eb_bhs = NULL;
  465. struct ocfs2_dinode *fe;
  466. struct ocfs2_extent_block *eb;
  467. struct ocfs2_extent_list *el;
  468. mlog_entry_void();
  469. status = ocfs2_journal_access(handle, inode, fe_bh,
  470. OCFS2_JOURNAL_ACCESS_WRITE);
  471. if (status < 0) {
  472. mlog_errno(status);
  473. goto bail;
  474. }
  475. fe = (struct ocfs2_dinode *) fe_bh->b_data;
  476. el = &fe->id2.i_list;
  477. if (el->l_tree_depth) {
  478. /* This is another operation where we want to be
  479. * careful about our tree updates. An error here means
  480. * none of the previous changes we made should roll
  481. * forward. As a result, we have to record the buffers
  482. * for this part of the tree in an array and reserve a
  483. * journal write to them before making any changes. */
  484. num_bhs = le16_to_cpu(fe->id2.i_list.l_tree_depth);
  485. eb_bhs = kcalloc(num_bhs, sizeof(struct buffer_head *),
  486. GFP_KERNEL);
  487. if (!eb_bhs) {
  488. status = -ENOMEM;
  489. mlog_errno(status);
  490. goto bail;
  491. }
  492. i = 0;
  493. while(el->l_tree_depth) {
  494. next_free = le16_to_cpu(el->l_next_free_rec);
  495. if (next_free == 0) {
  496. ocfs2_error(inode->i_sb,
  497. "Dinode %llu has a bad extent list",
  498. (unsigned long long)OCFS2_I(inode)->ip_blkno);
  499. status = -EIO;
  500. goto bail;
  501. }
  502. next_blkno = le64_to_cpu(el->l_recs[next_free - 1].e_blkno);
  503. BUG_ON(i >= num_bhs);
  504. status = ocfs2_read_block(osb, next_blkno, &eb_bhs[i],
  505. OCFS2_BH_CACHED, inode);
  506. if (status < 0) {
  507. mlog_errno(status);
  508. goto bail;
  509. }
  510. eb = (struct ocfs2_extent_block *) eb_bhs[i]->b_data;
  511. if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
  512. OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb,
  513. eb);
  514. status = -EIO;
  515. goto bail;
  516. }
  517. status = ocfs2_journal_access(handle, inode, eb_bhs[i],
  518. OCFS2_JOURNAL_ACCESS_WRITE);
  519. if (status < 0) {
  520. mlog_errno(status);
  521. goto bail;
  522. }
  523. el = &eb->h_list;
  524. i++;
  525. /* When we leave this loop, eb_bhs[num_bhs - 1] will
  526. * hold the bottom-most leaf extent block. */
  527. }
  528. BUG_ON(el->l_tree_depth);
  529. el = &fe->id2.i_list;
  530. /* If we have tree depth, then the fe update is
  531. * trivial, and we want to switch el out for the
  532. * bottom-most leaf in order to update it with the
  533. * actual extent data below. */
  534. next_free = le16_to_cpu(el->l_next_free_rec);
  535. if (next_free == 0) {
  536. ocfs2_error(inode->i_sb,
  537. "Dinode %llu has a bad extent list",
  538. (unsigned long long)OCFS2_I(inode)->ip_blkno);
  539. status = -EIO;
  540. goto bail;
  541. }
  542. le32_add_cpu(&el->l_recs[next_free - 1].e_clusters,
  543. new_clusters);
  544. /* (num_bhs - 1) to avoid the leaf */
  545. for(i = 0; i < (num_bhs - 1); i++) {
  546. eb = (struct ocfs2_extent_block *) eb_bhs[i]->b_data;
  547. el = &eb->h_list;
  548. /* finally, make our actual change to the
  549. * intermediate extent blocks. */
  550. next_free = le16_to_cpu(el->l_next_free_rec);
  551. le32_add_cpu(&el->l_recs[next_free - 1].e_clusters,
  552. new_clusters);
  553. status = ocfs2_journal_dirty(handle, eb_bhs[i]);
  554. if (status < 0)
  555. mlog_errno(status);
  556. }
  557. BUG_ON(i != (num_bhs - 1));
  558. /* note that the leaf block wasn't touched in
  559. * the loop above */
  560. eb = (struct ocfs2_extent_block *) eb_bhs[num_bhs - 1]->b_data;
  561. el = &eb->h_list;
  562. BUG_ON(el->l_tree_depth);
  563. }
  564. /* yay, we can finally add the actual extent now! */
  565. i = le16_to_cpu(el->l_next_free_rec) - 1;
  566. if (le16_to_cpu(el->l_next_free_rec) &&
  567. ocfs2_extent_contig(inode, &el->l_recs[i], start_blk)) {
  568. le32_add_cpu(&el->l_recs[i].e_clusters, new_clusters);
  569. } else if (le16_to_cpu(el->l_next_free_rec) &&
  570. (le32_to_cpu(el->l_recs[i].e_clusters) == 0)) {
  571. /* having an empty extent at eof is legal. */
  572. if (el->l_recs[i].e_cpos != fe->i_clusters) {
  573. ocfs2_error(inode->i_sb,
  574. "Dinode %llu trailing extent is bad: "
  575. "cpos (%u) != number of clusters (%u)",
  576. (unsigned long long)OCFS2_I(inode)->ip_blkno,
  577. le32_to_cpu(el->l_recs[i].e_cpos),
  578. le32_to_cpu(fe->i_clusters));
  579. status = -EIO;
  580. goto bail;
  581. }
  582. el->l_recs[i].e_blkno = cpu_to_le64(start_blk);
  583. el->l_recs[i].e_clusters = cpu_to_le32(new_clusters);
  584. } else {
  585. /* No contiguous record, or no empty record at eof, so
  586. * we add a new one. */
  587. BUG_ON(le16_to_cpu(el->l_next_free_rec) >=
  588. le16_to_cpu(el->l_count));
  589. i = le16_to_cpu(el->l_next_free_rec);
  590. el->l_recs[i].e_blkno = cpu_to_le64(start_blk);
  591. el->l_recs[i].e_clusters = cpu_to_le32(new_clusters);
  592. el->l_recs[i].e_cpos = fe->i_clusters;
  593. le16_add_cpu(&el->l_next_free_rec, 1);
  594. }
  595. /*
  596. * extent_map errors are not fatal, so they are ignored outside
  597. * of flushing the thing.
  598. */
  599. status = ocfs2_extent_map_append(inode, &el->l_recs[i],
  600. new_clusters);
  601. if (status) {
  602. mlog_errno(status);
  603. ocfs2_extent_map_drop(inode, le32_to_cpu(fe->i_clusters));
  604. }
  605. status = ocfs2_journal_dirty(handle, fe_bh);
  606. if (status < 0)
  607. mlog_errno(status);
  608. if (fe->id2.i_list.l_tree_depth) {
  609. status = ocfs2_journal_dirty(handle, eb_bhs[num_bhs - 1]);
  610. if (status < 0)
  611. mlog_errno(status);
  612. }
  613. status = 0;
  614. bail:
  615. if (eb_bhs) {
  616. for (i = 0; i < num_bhs; i++)
  617. if (eb_bhs[i])
  618. brelse(eb_bhs[i]);
  619. kfree(eb_bhs);
  620. }
  621. mlog_exit(status);
  622. return status;
  623. }
  624. /*
  625. * Should only be called when there is no space left in any of the
  626. * leaf nodes. What we want to do is find the lowest tree depth
  627. * non-leaf extent block with room for new records. There are three
  628. * valid results of this search:
  629. *
  630. * 1) a lowest extent block is found, then we pass it back in
  631. * *lowest_eb_bh and return '0'
  632. *
  633. * 2) the search fails to find anything, but the dinode has room. We
  634. * pass NULL back in *lowest_eb_bh, but still return '0'
  635. *
  636. * 3) the search fails to find anything AND the dinode is full, in
  637. * which case we return > 0
  638. *
  639. * return status < 0 indicates an error.
  640. */
  641. static int ocfs2_find_branch_target(struct ocfs2_super *osb,
  642. struct inode *inode,
  643. struct buffer_head *fe_bh,
  644. struct buffer_head **target_bh)
  645. {
  646. int status = 0, i;
  647. u64 blkno;
  648. struct ocfs2_dinode *fe;
  649. struct ocfs2_extent_block *eb;
  650. struct ocfs2_extent_list *el;
  651. struct buffer_head *bh = NULL;
  652. struct buffer_head *lowest_bh = NULL;
  653. mlog_entry_void();
  654. *target_bh = NULL;
  655. fe = (struct ocfs2_dinode *) fe_bh->b_data;
  656. el = &fe->id2.i_list;
  657. while(le16_to_cpu(el->l_tree_depth) > 1) {
  658. if (le16_to_cpu(el->l_next_free_rec) == 0) {
  659. ocfs2_error(inode->i_sb, "Dinode %llu has empty "
  660. "extent list (next_free_rec == 0)",
  661. (unsigned long long)OCFS2_I(inode)->ip_blkno);
  662. status = -EIO;
  663. goto bail;
  664. }
  665. i = le16_to_cpu(el->l_next_free_rec) - 1;
  666. blkno = le64_to_cpu(el->l_recs[i].e_blkno);
  667. if (!blkno) {
  668. ocfs2_error(inode->i_sb, "Dinode %llu has extent "
  669. "list where extent # %d has no physical "
  670. "block start",
  671. (unsigned long long)OCFS2_I(inode)->ip_blkno, i);
  672. status = -EIO;
  673. goto bail;
  674. }
  675. if (bh) {
  676. brelse(bh);
  677. bh = NULL;
  678. }
  679. status = ocfs2_read_block(osb, blkno, &bh, OCFS2_BH_CACHED,
  680. inode);
  681. if (status < 0) {
  682. mlog_errno(status);
  683. goto bail;
  684. }
  685. eb = (struct ocfs2_extent_block *) bh->b_data;
  686. if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
  687. OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
  688. status = -EIO;
  689. goto bail;
  690. }
  691. el = &eb->h_list;
  692. if (le16_to_cpu(el->l_next_free_rec) <
  693. le16_to_cpu(el->l_count)) {
  694. if (lowest_bh)
  695. brelse(lowest_bh);
  696. lowest_bh = bh;
  697. get_bh(lowest_bh);
  698. }
  699. }
  700. /* If we didn't find one and the fe doesn't have any room,
  701. * then return '1' */
  702. if (!lowest_bh
  703. && (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count))
  704. status = 1;
  705. *target_bh = lowest_bh;
  706. bail:
  707. if (bh)
  708. brelse(bh);
  709. mlog_exit(status);
  710. return status;
  711. }
  712. /* the caller needs to update fe->i_clusters */
  713. int ocfs2_insert_extent(struct ocfs2_super *osb,
  714. struct ocfs2_journal_handle *handle,
  715. struct inode *inode,
  716. struct buffer_head *fe_bh,
  717. u64 start_blk,
  718. u32 new_clusters,
  719. struct ocfs2_alloc_context *meta_ac)
  720. {
  721. int status, i, shift;
  722. struct buffer_head *last_eb_bh = NULL;
  723. struct buffer_head *bh = NULL;
  724. struct ocfs2_dinode *fe;
  725. struct ocfs2_extent_block *eb;
  726. struct ocfs2_extent_list *el;
  727. mlog_entry_void();
  728. mlog(0, "add %u clusters starting at block %llu to inode %llu\n",
  729. new_clusters, (unsigned long long)start_blk,
  730. (unsigned long long)OCFS2_I(inode)->ip_blkno);
  731. fe = (struct ocfs2_dinode *) fe_bh->b_data;
  732. el = &fe->id2.i_list;
  733. if (el->l_tree_depth) {
  734. /* jump to end of tree */
  735. status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
  736. &last_eb_bh, OCFS2_BH_CACHED, inode);
  737. if (status < 0) {
  738. mlog_exit(status);
  739. goto bail;
  740. }
  741. eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
  742. el = &eb->h_list;
  743. }
  744. /* Can we allocate without adding/shifting tree bits? */
  745. i = le16_to_cpu(el->l_next_free_rec) - 1;
  746. if (le16_to_cpu(el->l_next_free_rec) == 0
  747. || (le16_to_cpu(el->l_next_free_rec) < le16_to_cpu(el->l_count))
  748. || le32_to_cpu(el->l_recs[i].e_clusters) == 0
  749. || ocfs2_extent_contig(inode, &el->l_recs[i], start_blk))
  750. goto out_add;
  751. mlog(0, "ocfs2_allocate_extent: couldn't do a simple add, traversing "
  752. "tree now.\n");
  753. shift = ocfs2_find_branch_target(osb, inode, fe_bh, &bh);
  754. if (shift < 0) {
  755. status = shift;
  756. mlog_errno(status);
  757. goto bail;
  758. }
  759. /* We traveled all the way to the bottom of the allocation tree
  760. * and didn't find room for any more extents - we need to add
  761. * another tree level */
  762. if (shift) {
  763. /* if we hit a leaf, we'd better be empty :) */
  764. BUG_ON(le16_to_cpu(el->l_next_free_rec) !=
  765. le16_to_cpu(el->l_count));
  766. BUG_ON(bh);
  767. mlog(0, "ocfs2_allocate_extent: need to shift tree depth "
  768. "(current = %u)\n",
  769. le16_to_cpu(fe->id2.i_list.l_tree_depth));
  770. /* ocfs2_shift_tree_depth will return us a buffer with
  771. * the new extent block (so we can pass that to
  772. * ocfs2_add_branch). */
  773. status = ocfs2_shift_tree_depth(osb, handle, inode, fe_bh,
  774. meta_ac, &bh);
  775. if (status < 0) {
  776. mlog_errno(status);
  777. goto bail;
  778. }
  779. /* Special case: we have room now if we shifted from
  780. * tree_depth 0 */
  781. if (fe->id2.i_list.l_tree_depth == cpu_to_le16(1))
  782. goto out_add;
  783. }
  784. /* call ocfs2_add_branch to add the final part of the tree with
  785. * the new data. */
  786. mlog(0, "ocfs2_allocate_extent: add branch. bh = %p\n", bh);
  787. status = ocfs2_add_branch(osb, handle, inode, fe_bh, bh, last_eb_bh,
  788. meta_ac);
  789. if (status < 0) {
  790. mlog_errno(status);
  791. goto bail;
  792. }
  793. out_add:
  794. /* Finally, we can add clusters. */
  795. status = ocfs2_do_insert_extent(osb, handle, inode, fe_bh,
  796. start_blk, new_clusters);
  797. if (status < 0)
  798. mlog_errno(status);
  799. bail:
  800. if (bh)
  801. brelse(bh);
  802. if (last_eb_bh)
  803. brelse(last_eb_bh);
  804. mlog_exit(status);
  805. return status;
  806. }
  807. static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb)
  808. {
  809. struct buffer_head *tl_bh = osb->osb_tl_bh;
  810. struct ocfs2_dinode *di;
  811. struct ocfs2_truncate_log *tl;
  812. di = (struct ocfs2_dinode *) tl_bh->b_data;
  813. tl = &di->id2.i_dealloc;
  814. mlog_bug_on_msg(le16_to_cpu(tl->tl_used) > le16_to_cpu(tl->tl_count),
  815. "slot %d, invalid truncate log parameters: used = "
  816. "%u, count = %u\n", osb->slot_num,
  817. le16_to_cpu(tl->tl_used), le16_to_cpu(tl->tl_count));
  818. return le16_to_cpu(tl->tl_used) == le16_to_cpu(tl->tl_count);
  819. }
  820. static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl,
  821. unsigned int new_start)
  822. {
  823. unsigned int tail_index;
  824. unsigned int current_tail;
  825. /* No records, nothing to coalesce */
  826. if (!le16_to_cpu(tl->tl_used))
  827. return 0;
  828. tail_index = le16_to_cpu(tl->tl_used) - 1;
  829. current_tail = le32_to_cpu(tl->tl_recs[tail_index].t_start);
  830. current_tail += le32_to_cpu(tl->tl_recs[tail_index].t_clusters);
  831. return current_tail == new_start;
  832. }
  833. static int ocfs2_truncate_log_append(struct ocfs2_super *osb,
  834. struct ocfs2_journal_handle *handle,
  835. u64 start_blk,
  836. unsigned int num_clusters)
  837. {
  838. int status, index;
  839. unsigned int start_cluster, tl_count;
  840. struct inode *tl_inode = osb->osb_tl_inode;
  841. struct buffer_head *tl_bh = osb->osb_tl_bh;
  842. struct ocfs2_dinode *di;
  843. struct ocfs2_truncate_log *tl;
  844. mlog_entry("start_blk = %llu, num_clusters = %u\n",
  845. (unsigned long long)start_blk, num_clusters);
  846. BUG_ON(mutex_trylock(&tl_inode->i_mutex));
  847. start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
  848. di = (struct ocfs2_dinode *) tl_bh->b_data;
  849. tl = &di->id2.i_dealloc;
  850. if (!OCFS2_IS_VALID_DINODE(di)) {
  851. OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
  852. status = -EIO;
  853. goto bail;
  854. }
  855. tl_count = le16_to_cpu(tl->tl_count);
  856. mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
  857. tl_count == 0,
  858. "Truncate record count on #%llu invalid "
  859. "wanted %u, actual %u\n",
  860. (unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
  861. ocfs2_truncate_recs_per_inode(osb->sb),
  862. le16_to_cpu(tl->tl_count));
  863. /* Caller should have known to flush before calling us. */
  864. index = le16_to_cpu(tl->tl_used);
  865. if (index >= tl_count) {
  866. status = -ENOSPC;
  867. mlog_errno(status);
  868. goto bail;
  869. }
  870. status = ocfs2_journal_access(handle, tl_inode, tl_bh,
  871. OCFS2_JOURNAL_ACCESS_WRITE);
  872. if (status < 0) {
  873. mlog_errno(status);
  874. goto bail;
  875. }
  876. mlog(0, "Log truncate of %u clusters starting at cluster %u to "
  877. "%llu (index = %d)\n", num_clusters, start_cluster,
  878. (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, index);
  879. if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) {
  880. /*
  881. * Move index back to the record we are coalescing with.
  882. * ocfs2_truncate_log_can_coalesce() guarantees nonzero
  883. */
  884. index--;
  885. num_clusters += le32_to_cpu(tl->tl_recs[index].t_clusters);
  886. mlog(0, "Coalesce with index %u (start = %u, clusters = %u)\n",
  887. index, le32_to_cpu(tl->tl_recs[index].t_start),
  888. num_clusters);
  889. } else {
  890. tl->tl_recs[index].t_start = cpu_to_le32(start_cluster);
  891. tl->tl_used = cpu_to_le16(index + 1);
  892. }
  893. tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters);
  894. status = ocfs2_journal_dirty(handle, tl_bh);
  895. if (status < 0) {
  896. mlog_errno(status);
  897. goto bail;
  898. }
  899. bail:
  900. mlog_exit(status);
  901. return status;
  902. }
  903. static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
  904. struct ocfs2_journal_handle *handle,
  905. struct inode *data_alloc_inode,
  906. struct buffer_head *data_alloc_bh)
  907. {
  908. int status = 0;
  909. int i;
  910. unsigned int num_clusters;
  911. u64 start_blk;
  912. struct ocfs2_truncate_rec rec;
  913. struct ocfs2_dinode *di;
  914. struct ocfs2_truncate_log *tl;
  915. struct inode *tl_inode = osb->osb_tl_inode;
  916. struct buffer_head *tl_bh = osb->osb_tl_bh;
  917. mlog_entry_void();
  918. di = (struct ocfs2_dinode *) tl_bh->b_data;
  919. tl = &di->id2.i_dealloc;
  920. i = le16_to_cpu(tl->tl_used) - 1;
  921. while (i >= 0) {
  922. /* Caller has given us at least enough credits to
  923. * update the truncate log dinode */
  924. status = ocfs2_journal_access(handle, tl_inode, tl_bh,
  925. OCFS2_JOURNAL_ACCESS_WRITE);
  926. if (status < 0) {
  927. mlog_errno(status);
  928. goto bail;
  929. }
  930. tl->tl_used = cpu_to_le16(i);
  931. status = ocfs2_journal_dirty(handle, tl_bh);
  932. if (status < 0) {
  933. mlog_errno(status);
  934. goto bail;
  935. }
  936. /* TODO: Perhaps we can calculate the bulk of the
  937. * credits up front rather than extending like
  938. * this. */
  939. status = ocfs2_extend_trans(handle,
  940. OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
  941. if (status < 0) {
  942. mlog_errno(status);
  943. goto bail;
  944. }
  945. rec = tl->tl_recs[i];
  946. start_blk = ocfs2_clusters_to_blocks(data_alloc_inode->i_sb,
  947. le32_to_cpu(rec.t_start));
  948. num_clusters = le32_to_cpu(rec.t_clusters);
  949. /* if start_blk is not set, we ignore the record as
  950. * invalid. */
  951. if (start_blk) {
  952. mlog(0, "free record %d, start = %u, clusters = %u\n",
  953. i, le32_to_cpu(rec.t_start), num_clusters);
  954. status = ocfs2_free_clusters(handle, data_alloc_inode,
  955. data_alloc_bh, start_blk,
  956. num_clusters);
  957. if (status < 0) {
  958. mlog_errno(status);
  959. goto bail;
  960. }
  961. }
  962. i--;
  963. }
  964. bail:
  965. mlog_exit(status);
  966. return status;
  967. }
  968. /* Expects you to already be holding tl_inode->i_mutex */
  969. static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
  970. {
  971. int status;
  972. unsigned int num_to_flush;
  973. struct ocfs2_journal_handle *handle = NULL;
  974. struct inode *tl_inode = osb->osb_tl_inode;
  975. struct inode *data_alloc_inode = NULL;
  976. struct buffer_head *tl_bh = osb->osb_tl_bh;
  977. struct buffer_head *data_alloc_bh = NULL;
  978. struct ocfs2_dinode *di;
  979. struct ocfs2_truncate_log *tl;
  980. mlog_entry_void();
  981. BUG_ON(mutex_trylock(&tl_inode->i_mutex));
  982. di = (struct ocfs2_dinode *) tl_bh->b_data;
  983. tl = &di->id2.i_dealloc;
  984. if (!OCFS2_IS_VALID_DINODE(di)) {
  985. OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
  986. status = -EIO;
  987. goto bail;
  988. }
  989. num_to_flush = le16_to_cpu(tl->tl_used);
  990. mlog(0, "Flush %u records from truncate log #%llu\n",
  991. num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno);
  992. if (!num_to_flush) {
  993. status = 0;
  994. goto bail;
  995. }
  996. handle = ocfs2_alloc_handle(osb);
  997. if (!handle) {
  998. status = -ENOMEM;
  999. mlog_errno(status);
  1000. goto bail;
  1001. }
  1002. data_alloc_inode = ocfs2_get_system_file_inode(osb,
  1003. GLOBAL_BITMAP_SYSTEM_INODE,
  1004. OCFS2_INVALID_SLOT);
  1005. if (!data_alloc_inode) {
  1006. status = -EINVAL;
  1007. mlog(ML_ERROR, "Could not get bitmap inode!\n");
  1008. goto bail;
  1009. }
  1010. ocfs2_handle_add_inode(handle, data_alloc_inode);
  1011. status = ocfs2_meta_lock(data_alloc_inode, handle, &data_alloc_bh, 1);
  1012. if (status < 0) {
  1013. mlog_errno(status);
  1014. goto bail;
  1015. }
  1016. handle = ocfs2_start_trans(osb, handle, OCFS2_TRUNCATE_LOG_UPDATE);
  1017. if (IS_ERR(handle)) {
  1018. status = PTR_ERR(handle);
  1019. handle = NULL;
  1020. mlog_errno(status);
  1021. goto bail;
  1022. }
  1023. status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode,
  1024. data_alloc_bh);
  1025. if (status < 0) {
  1026. mlog_errno(status);
  1027. goto bail;
  1028. }
  1029. bail:
  1030. if (handle)
  1031. ocfs2_commit_trans(handle);
  1032. if (data_alloc_inode)
  1033. iput(data_alloc_inode);
  1034. if (data_alloc_bh)
  1035. brelse(data_alloc_bh);
  1036. mlog_exit(status);
  1037. return status;
  1038. }
  1039. int ocfs2_flush_truncate_log(struct ocfs2_super *osb)
  1040. {
  1041. int status;
  1042. struct inode *tl_inode = osb->osb_tl_inode;
  1043. mutex_lock(&tl_inode->i_mutex);
  1044. status = __ocfs2_flush_truncate_log(osb);
  1045. mutex_unlock(&tl_inode->i_mutex);
  1046. return status;
  1047. }
  1048. static void ocfs2_truncate_log_worker(void *data)
  1049. {
  1050. int status;
  1051. struct ocfs2_super *osb = data;
  1052. mlog_entry_void();
  1053. status = ocfs2_flush_truncate_log(osb);
  1054. if (status < 0)
  1055. mlog_errno(status);
  1056. mlog_exit(status);
  1057. }
  1058. #define OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL (2 * HZ)
  1059. void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
  1060. int cancel)
  1061. {
  1062. if (osb->osb_tl_inode) {
  1063. /* We want to push off log flushes while truncates are
  1064. * still running. */
  1065. if (cancel)
  1066. cancel_delayed_work(&osb->osb_truncate_log_wq);
  1067. queue_delayed_work(ocfs2_wq, &osb->osb_truncate_log_wq,
  1068. OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL);
  1069. }
  1070. }
  1071. static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
  1072. int slot_num,
  1073. struct inode **tl_inode,
  1074. struct buffer_head **tl_bh)
  1075. {
  1076. int status;
  1077. struct inode *inode = NULL;
  1078. struct buffer_head *bh = NULL;
  1079. inode = ocfs2_get_system_file_inode(osb,
  1080. TRUNCATE_LOG_SYSTEM_INODE,
  1081. slot_num);
  1082. if (!inode) {
  1083. status = -EINVAL;
  1084. mlog(ML_ERROR, "Could not get load truncate log inode!\n");
  1085. goto bail;
  1086. }
  1087. status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
  1088. OCFS2_BH_CACHED, inode);
  1089. if (status < 0) {
  1090. iput(inode);
  1091. mlog_errno(status);
  1092. goto bail;
  1093. }
  1094. *tl_inode = inode;
  1095. *tl_bh = bh;
  1096. bail:
  1097. mlog_exit(status);
  1098. return status;
  1099. }
  1100. /* called during the 1st stage of node recovery. we stamp a clean
  1101. * truncate log and pass back a copy for processing later. if the
  1102. * truncate log does not require processing, a *tl_copy is set to
  1103. * NULL. */
  1104. int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
  1105. int slot_num,
  1106. struct ocfs2_dinode **tl_copy)
  1107. {
  1108. int status;
  1109. struct inode *tl_inode = NULL;
  1110. struct buffer_head *tl_bh = NULL;
  1111. struct ocfs2_dinode *di;
  1112. struct ocfs2_truncate_log *tl;
  1113. *tl_copy = NULL;
  1114. mlog(0, "recover truncate log from slot %d\n", slot_num);
  1115. status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh);
  1116. if (status < 0) {
  1117. mlog_errno(status);
  1118. goto bail;
  1119. }
  1120. di = (struct ocfs2_dinode *) tl_bh->b_data;
  1121. tl = &di->id2.i_dealloc;
  1122. if (!OCFS2_IS_VALID_DINODE(di)) {
  1123. OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di);
  1124. status = -EIO;
  1125. goto bail;
  1126. }
  1127. if (le16_to_cpu(tl->tl_used)) {
  1128. mlog(0, "We'll have %u logs to recover\n",
  1129. le16_to_cpu(tl->tl_used));
  1130. *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL);
  1131. if (!(*tl_copy)) {
  1132. status = -ENOMEM;
  1133. mlog_errno(status);
  1134. goto bail;
  1135. }
  1136. /* Assuming the write-out below goes well, this copy
  1137. * will be passed back to recovery for processing. */
  1138. memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size);
  1139. /* All we need to do to clear the truncate log is set
  1140. * tl_used. */
  1141. tl->tl_used = 0;
  1142. status = ocfs2_write_block(osb, tl_bh, tl_inode);
  1143. if (status < 0) {
  1144. mlog_errno(status);
  1145. goto bail;
  1146. }
  1147. }
  1148. bail:
  1149. if (tl_inode)
  1150. iput(tl_inode);
  1151. if (tl_bh)
  1152. brelse(tl_bh);
  1153. if (status < 0 && (*tl_copy)) {
  1154. kfree(*tl_copy);
  1155. *tl_copy = NULL;
  1156. }
  1157. mlog_exit(status);
  1158. return status;
  1159. }
  1160. int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
  1161. struct ocfs2_dinode *tl_copy)
  1162. {
  1163. int status = 0;
  1164. int i;
  1165. unsigned int clusters, num_recs, start_cluster;
  1166. u64 start_blk;
  1167. struct ocfs2_journal_handle *handle;
  1168. struct inode *tl_inode = osb->osb_tl_inode;
  1169. struct ocfs2_truncate_log *tl;
  1170. mlog_entry_void();
  1171. if (OCFS2_I(tl_inode)->ip_blkno == le64_to_cpu(tl_copy->i_blkno)) {
  1172. mlog(ML_ERROR, "Asked to recover my own truncate log!\n");
  1173. return -EINVAL;
  1174. }
  1175. tl = &tl_copy->id2.i_dealloc;
  1176. num_recs = le16_to_cpu(tl->tl_used);
  1177. mlog(0, "cleanup %u records from %llu\n", num_recs,
  1178. (unsigned long long)tl_copy->i_blkno);
  1179. mutex_lock(&tl_inode->i_mutex);
  1180. for(i = 0; i < num_recs; i++) {
  1181. if (ocfs2_truncate_log_needs_flush(osb)) {
  1182. status = __ocfs2_flush_truncate_log(osb);
  1183. if (status < 0) {
  1184. mlog_errno(status);
  1185. goto bail_up;
  1186. }
  1187. }
  1188. handle = ocfs2_start_trans(osb, NULL,
  1189. OCFS2_TRUNCATE_LOG_UPDATE);
  1190. if (IS_ERR(handle)) {
  1191. status = PTR_ERR(handle);
  1192. mlog_errno(status);
  1193. goto bail_up;
  1194. }
  1195. clusters = le32_to_cpu(tl->tl_recs[i].t_clusters);
  1196. start_cluster = le32_to_cpu(tl->tl_recs[i].t_start);
  1197. start_blk = ocfs2_clusters_to_blocks(osb->sb, start_cluster);
  1198. status = ocfs2_truncate_log_append(osb, handle,
  1199. start_blk, clusters);
  1200. ocfs2_commit_trans(handle);
  1201. if (status < 0) {
  1202. mlog_errno(status);
  1203. goto bail_up;
  1204. }
  1205. }
  1206. bail_up:
  1207. mutex_unlock(&tl_inode->i_mutex);
  1208. mlog_exit(status);
  1209. return status;
  1210. }
  1211. void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb)
  1212. {
  1213. int status;
  1214. struct inode *tl_inode = osb->osb_tl_inode;
  1215. mlog_entry_void();
  1216. if (tl_inode) {
  1217. cancel_delayed_work(&osb->osb_truncate_log_wq);
  1218. flush_workqueue(ocfs2_wq);
  1219. status = ocfs2_flush_truncate_log(osb);
  1220. if (status < 0)
  1221. mlog_errno(status);
  1222. brelse(osb->osb_tl_bh);
  1223. iput(osb->osb_tl_inode);
  1224. }
  1225. mlog_exit_void();
  1226. }
  1227. int ocfs2_truncate_log_init(struct ocfs2_super *osb)
  1228. {
  1229. int status;
  1230. struct inode *tl_inode = NULL;
  1231. struct buffer_head *tl_bh = NULL;
  1232. mlog_entry_void();
  1233. status = ocfs2_get_truncate_log_info(osb,
  1234. osb->slot_num,
  1235. &tl_inode,
  1236. &tl_bh);
  1237. if (status < 0)
  1238. mlog_errno(status);
  1239. /* ocfs2_truncate_log_shutdown keys on the existence of
  1240. * osb->osb_tl_inode so we don't set any of the osb variables
  1241. * until we're sure all is well. */
  1242. INIT_WORK(&osb->osb_truncate_log_wq, ocfs2_truncate_log_worker, osb);
  1243. osb->osb_tl_bh = tl_bh;
  1244. osb->osb_tl_inode = tl_inode;
  1245. mlog_exit(status);
  1246. return status;
  1247. }
  1248. /* This function will figure out whether the currently last extent
  1249. * block will be deleted, and if it will, what the new last extent
  1250. * block will be so we can update his h_next_leaf_blk field, as well
  1251. * as the dinodes i_last_eb_blk */
  1252. static int ocfs2_find_new_last_ext_blk(struct ocfs2_super *osb,
  1253. struct inode *inode,
  1254. struct ocfs2_dinode *fe,
  1255. u32 new_i_clusters,
  1256. struct buffer_head *old_last_eb,
  1257. struct buffer_head **new_last_eb)
  1258. {
  1259. int i, status = 0;
  1260. u64 block = 0;
  1261. struct ocfs2_extent_block *eb;
  1262. struct ocfs2_extent_list *el;
  1263. struct buffer_head *bh = NULL;
  1264. *new_last_eb = NULL;
  1265. if (!OCFS2_IS_VALID_DINODE(fe)) {
  1266. OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
  1267. status = -EIO;
  1268. goto bail;
  1269. }
  1270. /* we have no tree, so of course, no last_eb. */
  1271. if (!fe->id2.i_list.l_tree_depth)
  1272. goto bail;
  1273. /* trunc to zero special case - this makes tree_depth = 0
  1274. * regardless of what it is. */
  1275. if (!new_i_clusters)
  1276. goto bail;
  1277. eb = (struct ocfs2_extent_block *) old_last_eb->b_data;
  1278. el = &(eb->h_list);
  1279. BUG_ON(!el->l_next_free_rec);
  1280. /* Make sure that this guy will actually be empty after we
  1281. * clear away the data. */
  1282. if (le32_to_cpu(el->l_recs[0].e_cpos) < new_i_clusters)
  1283. goto bail;
  1284. /* Ok, at this point, we know that last_eb will definitely
  1285. * change, so lets traverse the tree and find the second to
  1286. * last extent block. */
  1287. el = &(fe->id2.i_list);
  1288. /* go down the tree, */
  1289. do {
  1290. for(i = (le16_to_cpu(el->l_next_free_rec) - 1); i >= 0; i--) {
  1291. if (le32_to_cpu(el->l_recs[i].e_cpos) <
  1292. new_i_clusters) {
  1293. block = le64_to_cpu(el->l_recs[i].e_blkno);
  1294. break;
  1295. }
  1296. }
  1297. BUG_ON(i < 0);
  1298. if (bh) {
  1299. brelse(bh);
  1300. bh = NULL;
  1301. }
  1302. status = ocfs2_read_block(osb, block, &bh, OCFS2_BH_CACHED,
  1303. inode);
  1304. if (status < 0) {
  1305. mlog_errno(status);
  1306. goto bail;
  1307. }
  1308. eb = (struct ocfs2_extent_block *) bh->b_data;
  1309. el = &eb->h_list;
  1310. if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
  1311. OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
  1312. status = -EIO;
  1313. goto bail;
  1314. }
  1315. } while (el->l_tree_depth);
  1316. *new_last_eb = bh;
  1317. get_bh(*new_last_eb);
  1318. mlog(0, "returning block %llu\n",
  1319. (unsigned long long)le64_to_cpu(eb->h_blkno));
  1320. bail:
  1321. if (bh)
  1322. brelse(bh);
  1323. return status;
  1324. }
  1325. static int ocfs2_do_truncate(struct ocfs2_super *osb,
  1326. unsigned int clusters_to_del,
  1327. struct inode *inode,
  1328. struct buffer_head *fe_bh,
  1329. struct buffer_head *old_last_eb_bh,
  1330. struct ocfs2_journal_handle *handle,
  1331. struct ocfs2_truncate_context *tc)
  1332. {
  1333. int status, i, depth;
  1334. struct ocfs2_dinode *fe;
  1335. struct ocfs2_extent_block *eb;
  1336. struct ocfs2_extent_block *last_eb = NULL;
  1337. struct ocfs2_extent_list *el;
  1338. struct buffer_head *eb_bh = NULL;
  1339. struct buffer_head *last_eb_bh = NULL;
  1340. u64 next_eb = 0;
  1341. u64 delete_blk = 0;
  1342. fe = (struct ocfs2_dinode *) fe_bh->b_data;
  1343. status = ocfs2_find_new_last_ext_blk(osb,
  1344. inode,
  1345. fe,
  1346. le32_to_cpu(fe->i_clusters) -
  1347. clusters_to_del,
  1348. old_last_eb_bh,
  1349. &last_eb_bh);
  1350. if (status < 0) {
  1351. mlog_errno(status);
  1352. goto bail;
  1353. }
  1354. if (last_eb_bh)
  1355. last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
  1356. status = ocfs2_journal_access(handle, inode, fe_bh,
  1357. OCFS2_JOURNAL_ACCESS_WRITE);
  1358. if (status < 0) {
  1359. mlog_errno(status);
  1360. goto bail;
  1361. }
  1362. el = &(fe->id2.i_list);
  1363. spin_lock(&OCFS2_I(inode)->ip_lock);
  1364. OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
  1365. clusters_to_del;
  1366. spin_unlock(&OCFS2_I(inode)->ip_lock);
  1367. le32_add_cpu(&fe->i_clusters, -clusters_to_del);
  1368. fe->i_mtime = cpu_to_le64(CURRENT_TIME.tv_sec);
  1369. fe->i_mtime_nsec = cpu_to_le32(CURRENT_TIME.tv_nsec);
  1370. i = le16_to_cpu(el->l_next_free_rec) - 1;
  1371. BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del);
  1372. le32_add_cpu(&el->l_recs[i].e_clusters, -clusters_to_del);
  1373. /* tree depth zero, we can just delete the clusters, otherwise
  1374. * we need to record the offset of the next level extent block
  1375. * as we may overwrite it. */
  1376. if (!el->l_tree_depth)
  1377. delete_blk = le64_to_cpu(el->l_recs[i].e_blkno)
  1378. + ocfs2_clusters_to_blocks(osb->sb,
  1379. le32_to_cpu(el->l_recs[i].e_clusters));
  1380. else
  1381. next_eb = le64_to_cpu(el->l_recs[i].e_blkno);
  1382. if (!el->l_recs[i].e_clusters) {
  1383. /* if we deleted the whole extent record, then clear
  1384. * out the other fields and update the extent
  1385. * list. For depth > 0 trees, we've already recorded
  1386. * the extent block in 'next_eb' */
  1387. el->l_recs[i].e_cpos = 0;
  1388. el->l_recs[i].e_blkno = 0;
  1389. BUG_ON(!el->l_next_free_rec);
  1390. le16_add_cpu(&el->l_next_free_rec, -1);
  1391. }
  1392. depth = le16_to_cpu(el->l_tree_depth);
  1393. if (!fe->i_clusters) {
  1394. /* trunc to zero is a special case. */
  1395. el->l_tree_depth = 0;
  1396. fe->i_last_eb_blk = 0;
  1397. } else if (last_eb)
  1398. fe->i_last_eb_blk = last_eb->h_blkno;
  1399. status = ocfs2_journal_dirty(handle, fe_bh);
  1400. if (status < 0) {
  1401. mlog_errno(status);
  1402. goto bail;
  1403. }
  1404. if (last_eb) {
  1405. /* If there will be a new last extent block, then by
  1406. * definition, there cannot be any leaves to the right of
  1407. * him. */
  1408. status = ocfs2_journal_access(handle, inode, last_eb_bh,
  1409. OCFS2_JOURNAL_ACCESS_WRITE);
  1410. if (status < 0) {
  1411. mlog_errno(status);
  1412. goto bail;
  1413. }
  1414. last_eb->h_next_leaf_blk = 0;
  1415. status = ocfs2_journal_dirty(handle, last_eb_bh);
  1416. if (status < 0) {
  1417. mlog_errno(status);
  1418. goto bail;
  1419. }
  1420. }
  1421. /* if our tree depth > 0, update all the tree blocks below us. */
  1422. while (depth) {
  1423. mlog(0, "traveling tree (depth = %d, next_eb = %llu)\n",
  1424. depth, (unsigned long long)next_eb);
  1425. status = ocfs2_read_block(osb, next_eb, &eb_bh,
  1426. OCFS2_BH_CACHED, inode);
  1427. if (status < 0) {
  1428. mlog_errno(status);
  1429. goto bail;
  1430. }
  1431. eb = (struct ocfs2_extent_block *)eb_bh->b_data;
  1432. if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
  1433. OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
  1434. status = -EIO;
  1435. goto bail;
  1436. }
  1437. el = &(eb->h_list);
  1438. status = ocfs2_journal_access(handle, inode, eb_bh,
  1439. OCFS2_JOURNAL_ACCESS_WRITE);
  1440. if (status < 0) {
  1441. mlog_errno(status);
  1442. goto bail;
  1443. }
  1444. BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0);
  1445. BUG_ON(depth != (le16_to_cpu(el->l_tree_depth) + 1));
  1446. i = le16_to_cpu(el->l_next_free_rec) - 1;
  1447. mlog(0, "extent block %llu, before: record %d: "
  1448. "(%u, %u, %llu), next = %u\n",
  1449. (unsigned long long)le64_to_cpu(eb->h_blkno), i,
  1450. le32_to_cpu(el->l_recs[i].e_cpos),
  1451. le32_to_cpu(el->l_recs[i].e_clusters),
  1452. (unsigned long long)le64_to_cpu(el->l_recs[i].e_blkno),
  1453. le16_to_cpu(el->l_next_free_rec));
  1454. BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del);
  1455. le32_add_cpu(&el->l_recs[i].e_clusters, -clusters_to_del);
  1456. next_eb = le64_to_cpu(el->l_recs[i].e_blkno);
  1457. /* bottom-most block requires us to delete data.*/
  1458. if (!el->l_tree_depth)
  1459. delete_blk = le64_to_cpu(el->l_recs[i].e_blkno)
  1460. + ocfs2_clusters_to_blocks(osb->sb,
  1461. le32_to_cpu(el->l_recs[i].e_clusters));
  1462. if (!el->l_recs[i].e_clusters) {
  1463. el->l_recs[i].e_cpos = 0;
  1464. el->l_recs[i].e_blkno = 0;
  1465. BUG_ON(!el->l_next_free_rec);
  1466. le16_add_cpu(&el->l_next_free_rec, -1);
  1467. }
  1468. mlog(0, "extent block %llu, after: record %d: "
  1469. "(%u, %u, %llu), next = %u\n",
  1470. (unsigned long long)le64_to_cpu(eb->h_blkno), i,
  1471. le32_to_cpu(el->l_recs[i].e_cpos),
  1472. le32_to_cpu(el->l_recs[i].e_clusters),
  1473. (unsigned long long)le64_to_cpu(el->l_recs[i].e_blkno),
  1474. le16_to_cpu(el->l_next_free_rec));
  1475. status = ocfs2_journal_dirty(handle, eb_bh);
  1476. if (status < 0) {
  1477. mlog_errno(status);
  1478. goto bail;
  1479. }
  1480. if (!el->l_next_free_rec) {
  1481. mlog(0, "deleting this extent block.\n");
  1482. ocfs2_remove_from_cache(inode, eb_bh);
  1483. BUG_ON(eb->h_suballoc_slot);
  1484. BUG_ON(el->l_recs[0].e_clusters);
  1485. BUG_ON(el->l_recs[0].e_cpos);
  1486. BUG_ON(el->l_recs[0].e_blkno);
  1487. status = ocfs2_free_extent_block(handle,
  1488. tc->tc_ext_alloc_inode,
  1489. tc->tc_ext_alloc_bh,
  1490. eb);
  1491. if (status < 0) {
  1492. mlog_errno(status);
  1493. goto bail;
  1494. }
  1495. }
  1496. brelse(eb_bh);
  1497. eb_bh = NULL;
  1498. depth--;
  1499. }
  1500. BUG_ON(!delete_blk);
  1501. status = ocfs2_truncate_log_append(osb, handle, delete_blk,
  1502. clusters_to_del);
  1503. if (status < 0) {
  1504. mlog_errno(status);
  1505. goto bail;
  1506. }
  1507. status = 0;
  1508. bail:
  1509. if (!status)
  1510. ocfs2_extent_map_trunc(inode, le32_to_cpu(fe->i_clusters));
  1511. else
  1512. ocfs2_extent_map_drop(inode, 0);
  1513. mlog_exit(status);
  1514. return status;
  1515. }
  1516. /*
  1517. * It is expected, that by the time you call this function,
  1518. * inode->i_size and fe->i_size have been adjusted.
  1519. *
  1520. * WARNING: This will kfree the truncate context
  1521. */
  1522. int ocfs2_commit_truncate(struct ocfs2_super *osb,
  1523. struct inode *inode,
  1524. struct buffer_head *fe_bh,
  1525. struct ocfs2_truncate_context *tc)
  1526. {
  1527. int status, i, credits, tl_sem = 0;
  1528. u32 clusters_to_del, target_i_clusters;
  1529. u64 last_eb = 0;
  1530. struct ocfs2_dinode *fe;
  1531. struct ocfs2_extent_block *eb;
  1532. struct ocfs2_extent_list *el;
  1533. struct buffer_head *last_eb_bh;
  1534. struct ocfs2_journal_handle *handle = NULL;
  1535. struct inode *tl_inode = osb->osb_tl_inode;
  1536. mlog_entry_void();
  1537. down_write(&OCFS2_I(inode)->ip_alloc_sem);
  1538. target_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
  1539. i_size_read(inode));
  1540. last_eb_bh = tc->tc_last_eb_bh;
  1541. tc->tc_last_eb_bh = NULL;
  1542. fe = (struct ocfs2_dinode *) fe_bh->b_data;
  1543. if (fe->id2.i_list.l_tree_depth) {
  1544. eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
  1545. el = &eb->h_list;
  1546. } else
  1547. el = &fe->id2.i_list;
  1548. last_eb = le64_to_cpu(fe->i_last_eb_blk);
  1549. start:
  1550. mlog(0, "ocfs2_commit_truncate: fe->i_clusters = %u, "
  1551. "last_eb = %llu, fe->i_last_eb_blk = %llu, "
  1552. "fe->id2.i_list.l_tree_depth = %u last_eb_bh = %p\n",
  1553. le32_to_cpu(fe->i_clusters), (unsigned long long)last_eb,
  1554. (unsigned long long)le64_to_cpu(fe->i_last_eb_blk),
  1555. le16_to_cpu(fe->id2.i_list.l_tree_depth), last_eb_bh);
  1556. if (last_eb != le64_to_cpu(fe->i_last_eb_blk)) {
  1557. mlog(0, "last_eb changed!\n");
  1558. BUG_ON(!fe->id2.i_list.l_tree_depth);
  1559. last_eb = le64_to_cpu(fe->i_last_eb_blk);
  1560. /* i_last_eb_blk may have changed, read it if
  1561. * necessary. We don't have to worry about the
  1562. * truncate to zero case here (where there becomes no
  1563. * last_eb) because we never loop back after our work
  1564. * is done. */
  1565. if (last_eb_bh) {
  1566. brelse(last_eb_bh);
  1567. last_eb_bh = NULL;
  1568. }
  1569. status = ocfs2_read_block(osb, last_eb,
  1570. &last_eb_bh, OCFS2_BH_CACHED,
  1571. inode);
  1572. if (status < 0) {
  1573. mlog_errno(status);
  1574. goto bail;
  1575. }
  1576. eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
  1577. if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
  1578. OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
  1579. status = -EIO;
  1580. goto bail;
  1581. }
  1582. el = &(eb->h_list);
  1583. }
  1584. /* by now, el will point to the extent list on the bottom most
  1585. * portion of this tree. */
  1586. i = le16_to_cpu(el->l_next_free_rec) - 1;
  1587. if (le32_to_cpu(el->l_recs[i].e_cpos) >= target_i_clusters)
  1588. clusters_to_del = le32_to_cpu(el->l_recs[i].e_clusters);
  1589. else
  1590. clusters_to_del = (le32_to_cpu(el->l_recs[i].e_clusters) +
  1591. le32_to_cpu(el->l_recs[i].e_cpos)) -
  1592. target_i_clusters;
  1593. mlog(0, "clusters_to_del = %u in this pass\n", clusters_to_del);
  1594. mutex_lock(&tl_inode->i_mutex);
  1595. tl_sem = 1;
  1596. /* ocfs2_truncate_log_needs_flush guarantees us at least one
  1597. * record is free for use. If there isn't any, we flush to get
  1598. * an empty truncate log. */
  1599. if (ocfs2_truncate_log_needs_flush(osb)) {
  1600. status = __ocfs2_flush_truncate_log(osb);
  1601. if (status < 0) {
  1602. mlog_errno(status);
  1603. goto bail;
  1604. }
  1605. }
  1606. credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del,
  1607. fe, el);
  1608. handle = ocfs2_start_trans(osb, NULL, credits);
  1609. if (IS_ERR(handle)) {
  1610. status = PTR_ERR(handle);
  1611. handle = NULL;
  1612. mlog_errno(status);
  1613. goto bail;
  1614. }
  1615. inode->i_ctime = inode->i_mtime = CURRENT_TIME;
  1616. status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
  1617. if (status < 0)
  1618. mlog_errno(status);
  1619. status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh,
  1620. last_eb_bh, handle, tc);
  1621. if (status < 0) {
  1622. mlog_errno(status);
  1623. goto bail;
  1624. }
  1625. mutex_unlock(&tl_inode->i_mutex);
  1626. tl_sem = 0;
  1627. ocfs2_commit_trans(handle);
  1628. handle = NULL;
  1629. BUG_ON(le32_to_cpu(fe->i_clusters) < target_i_clusters);
  1630. if (le32_to_cpu(fe->i_clusters) > target_i_clusters)
  1631. goto start;
  1632. bail:
  1633. up_write(&OCFS2_I(inode)->ip_alloc_sem);
  1634. ocfs2_schedule_truncate_log_flush(osb, 1);
  1635. if (tl_sem)
  1636. mutex_unlock(&tl_inode->i_mutex);
  1637. if (handle)
  1638. ocfs2_commit_trans(handle);
  1639. if (last_eb_bh)
  1640. brelse(last_eb_bh);
  1641. /* This will drop the ext_alloc cluster lock for us */
  1642. ocfs2_free_truncate_context(tc);
  1643. mlog_exit(status);
  1644. return status;
  1645. }
  1646. /*
  1647. * Expects the inode to already be locked. This will figure out which
  1648. * inodes need to be locked and will put them on the returned truncate
  1649. * context.
  1650. */
  1651. int ocfs2_prepare_truncate(struct ocfs2_super *osb,
  1652. struct inode *inode,
  1653. struct buffer_head *fe_bh,
  1654. struct ocfs2_truncate_context **tc)
  1655. {
  1656. int status, metadata_delete;
  1657. unsigned int new_i_clusters;
  1658. struct ocfs2_dinode *fe;
  1659. struct ocfs2_extent_block *eb;
  1660. struct ocfs2_extent_list *el;
  1661. struct buffer_head *last_eb_bh = NULL;
  1662. struct inode *ext_alloc_inode = NULL;
  1663. struct buffer_head *ext_alloc_bh = NULL;
  1664. mlog_entry_void();
  1665. *tc = NULL;
  1666. new_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
  1667. i_size_read(inode));
  1668. fe = (struct ocfs2_dinode *) fe_bh->b_data;
  1669. mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size ="
  1670. "%llu\n", fe->i_clusters, new_i_clusters,
  1671. (unsigned long long)fe->i_size);
  1672. if (le32_to_cpu(fe->i_clusters) <= new_i_clusters) {
  1673. ocfs2_error(inode->i_sb, "Dinode %llu has cluster count "
  1674. "%u and size %llu whereas struct inode has "
  1675. "cluster count %u and size %llu which caused an "
  1676. "invalid truncate to %u clusters.",
  1677. (unsigned long long)le64_to_cpu(fe->i_blkno),
  1678. le32_to_cpu(fe->i_clusters),
  1679. (unsigned long long)le64_to_cpu(fe->i_size),
  1680. OCFS2_I(inode)->ip_clusters, i_size_read(inode),
  1681. new_i_clusters);
  1682. mlog_meta_lvb(ML_ERROR, &OCFS2_I(inode)->ip_meta_lockres);
  1683. status = -EIO;
  1684. goto bail;
  1685. }
  1686. *tc = kcalloc(1, sizeof(struct ocfs2_truncate_context), GFP_KERNEL);
  1687. if (!(*tc)) {
  1688. status = -ENOMEM;
  1689. mlog_errno(status);
  1690. goto bail;
  1691. }
  1692. metadata_delete = 0;
  1693. if (fe->id2.i_list.l_tree_depth) {
  1694. /* If we have a tree, then the truncate may result in
  1695. * metadata deletes. Figure this out from the
  1696. * rightmost leaf block.*/
  1697. status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
  1698. &last_eb_bh, OCFS2_BH_CACHED, inode);
  1699. if (status < 0) {
  1700. mlog_errno(status);
  1701. goto bail;
  1702. }
  1703. eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
  1704. if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
  1705. OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
  1706. brelse(last_eb_bh);
  1707. status = -EIO;
  1708. goto bail;
  1709. }
  1710. el = &(eb->h_list);
  1711. if (le32_to_cpu(el->l_recs[0].e_cpos) >= new_i_clusters)
  1712. metadata_delete = 1;
  1713. }
  1714. (*tc)->tc_last_eb_bh = last_eb_bh;
  1715. if (metadata_delete) {
  1716. mlog(0, "Will have to delete metadata for this trunc. "
  1717. "locking allocator.\n");
  1718. ext_alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0);
  1719. if (!ext_alloc_inode) {
  1720. status = -ENOMEM;
  1721. mlog_errno(status);
  1722. goto bail;
  1723. }
  1724. mutex_lock(&ext_alloc_inode->i_mutex);
  1725. (*tc)->tc_ext_alloc_inode = ext_alloc_inode;
  1726. status = ocfs2_meta_lock(ext_alloc_inode,
  1727. NULL,
  1728. &ext_alloc_bh,
  1729. 1);
  1730. if (status < 0) {
  1731. mlog_errno(status);
  1732. goto bail;
  1733. }
  1734. (*tc)->tc_ext_alloc_bh = ext_alloc_bh;
  1735. (*tc)->tc_ext_alloc_locked = 1;
  1736. }
  1737. status = 0;
  1738. bail:
  1739. if (status < 0) {
  1740. if (*tc)
  1741. ocfs2_free_truncate_context(*tc);
  1742. *tc = NULL;
  1743. }
  1744. mlog_exit_void();
  1745. return status;
  1746. }
  1747. static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
  1748. {
  1749. if (tc->tc_ext_alloc_inode) {
  1750. if (tc->tc_ext_alloc_locked)
  1751. ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1);
  1752. mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex);
  1753. iput(tc->tc_ext_alloc_inode);
  1754. }
  1755. if (tc->tc_ext_alloc_bh)
  1756. brelse(tc->tc_ext_alloc_bh);
  1757. if (tc->tc_last_eb_bh)
  1758. brelse(tc->tc_last_eb_bh);
  1759. kfree(tc);
  1760. }