alloc.c 52 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040
  1. /* -*- mode: c; c-basic-offset: 8; -*-
  2. * vim: noexpandtab sw=8 ts=8 sts=0:
  3. *
  4. * alloc.c
  5. *
  6. * Extent allocs and frees
  7. *
  8. * Copyright (C) 2002, 2004 Oracle. All rights reserved.
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2 of the License, or (at your option) any later version.
  14. *
  15. * This program is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU General Public
  21. * License along with this program; if not, write to the
  22. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23. * Boston, MA 021110-1307, USA.
  24. */
  25. #include <linux/fs.h>
  26. #include <linux/types.h>
  27. #include <linux/slab.h>
  28. #include <linux/highmem.h>
  29. #define MLOG_MASK_PREFIX ML_DISK_ALLOC
  30. #include <cluster/masklog.h>
  31. #include "ocfs2.h"
  32. #include "alloc.h"
  33. #include "dlmglue.h"
  34. #include "extent_map.h"
  35. #include "inode.h"
  36. #include "journal.h"
  37. #include "localalloc.h"
  38. #include "suballoc.h"
  39. #include "sysfile.h"
  40. #include "file.h"
  41. #include "super.h"
  42. #include "uptodate.h"
  43. #include "buffer_head_io.h"
  44. static int ocfs2_extent_contig(struct inode *inode,
  45. struct ocfs2_extent_rec *ext,
  46. u64 blkno);
  47. static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
  48. struct ocfs2_journal_handle *handle,
  49. struct inode *inode,
  50. int wanted,
  51. struct ocfs2_alloc_context *meta_ac,
  52. struct buffer_head *bhs[]);
  53. static int ocfs2_add_branch(struct ocfs2_super *osb,
  54. struct ocfs2_journal_handle *handle,
  55. struct inode *inode,
  56. struct buffer_head *fe_bh,
  57. struct buffer_head *eb_bh,
  58. struct buffer_head *last_eb_bh,
  59. struct ocfs2_alloc_context *meta_ac);
  60. static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
  61. struct ocfs2_journal_handle *handle,
  62. struct inode *inode,
  63. struct buffer_head *fe_bh,
  64. struct ocfs2_alloc_context *meta_ac,
  65. struct buffer_head **ret_new_eb_bh);
  66. static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
  67. struct ocfs2_journal_handle *handle,
  68. struct inode *inode,
  69. struct buffer_head *fe_bh,
  70. u64 blkno,
  71. u32 new_clusters);
  72. static int ocfs2_find_branch_target(struct ocfs2_super *osb,
  73. struct inode *inode,
  74. struct buffer_head *fe_bh,
  75. struct buffer_head **target_bh);
  76. static int ocfs2_find_new_last_ext_blk(struct ocfs2_super *osb,
  77. struct inode *inode,
  78. struct ocfs2_dinode *fe,
  79. unsigned int new_i_clusters,
  80. struct buffer_head *old_last_eb,
  81. struct buffer_head **new_last_eb);
  82. static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
  83. static int ocfs2_extent_contig(struct inode *inode,
  84. struct ocfs2_extent_rec *ext,
  85. u64 blkno)
  86. {
  87. return blkno == (le64_to_cpu(ext->e_blkno) +
  88. ocfs2_clusters_to_blocks(inode->i_sb,
  89. le32_to_cpu(ext->e_clusters)));
  90. }
  91. /*
  92. * How many free extents have we got before we need more meta data?
  93. */
  94. int ocfs2_num_free_extents(struct ocfs2_super *osb,
  95. struct inode *inode,
  96. struct ocfs2_dinode *fe)
  97. {
  98. int retval;
  99. struct ocfs2_extent_list *el;
  100. struct ocfs2_extent_block *eb;
  101. struct buffer_head *eb_bh = NULL;
  102. mlog_entry_void();
  103. if (!OCFS2_IS_VALID_DINODE(fe)) {
  104. OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
  105. retval = -EIO;
  106. goto bail;
  107. }
  108. if (fe->i_last_eb_blk) {
  109. retval = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
  110. &eb_bh, OCFS2_BH_CACHED, inode);
  111. if (retval < 0) {
  112. mlog_errno(retval);
  113. goto bail;
  114. }
  115. eb = (struct ocfs2_extent_block *) eb_bh->b_data;
  116. el = &eb->h_list;
  117. } else
  118. el = &fe->id2.i_list;
  119. BUG_ON(el->l_tree_depth != 0);
  120. retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec);
  121. bail:
  122. if (eb_bh)
  123. brelse(eb_bh);
  124. mlog_exit(retval);
  125. return retval;
  126. }
  127. /* expects array to already be allocated
  128. *
  129. * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and
  130. * l_count for you
  131. */
  132. static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
  133. struct ocfs2_journal_handle *handle,
  134. struct inode *inode,
  135. int wanted,
  136. struct ocfs2_alloc_context *meta_ac,
  137. struct buffer_head *bhs[])
  138. {
  139. int count, status, i;
  140. u16 suballoc_bit_start;
  141. u32 num_got;
  142. u64 first_blkno;
  143. struct ocfs2_extent_block *eb;
  144. mlog_entry_void();
  145. count = 0;
  146. while (count < wanted) {
  147. status = ocfs2_claim_metadata(osb,
  148. handle,
  149. meta_ac,
  150. wanted - count,
  151. &suballoc_bit_start,
  152. &num_got,
  153. &first_blkno);
  154. if (status < 0) {
  155. mlog_errno(status);
  156. goto bail;
  157. }
  158. for(i = count; i < (num_got + count); i++) {
  159. bhs[i] = sb_getblk(osb->sb, first_blkno);
  160. if (bhs[i] == NULL) {
  161. status = -EIO;
  162. mlog_errno(status);
  163. goto bail;
  164. }
  165. ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
  166. status = ocfs2_journal_access(handle, inode, bhs[i],
  167. OCFS2_JOURNAL_ACCESS_CREATE);
  168. if (status < 0) {
  169. mlog_errno(status);
  170. goto bail;
  171. }
  172. memset(bhs[i]->b_data, 0, osb->sb->s_blocksize);
  173. eb = (struct ocfs2_extent_block *) bhs[i]->b_data;
  174. /* Ok, setup the minimal stuff here. */
  175. strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
  176. eb->h_blkno = cpu_to_le64(first_blkno);
  177. eb->h_fs_generation = cpu_to_le32(osb->fs_generation);
  178. #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS
  179. /* we always use slot zero's suballocator */
  180. eb->h_suballoc_slot = 0;
  181. #else
  182. eb->h_suballoc_slot = cpu_to_le16(osb->slot_num);
  183. #endif
  184. eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start);
  185. eb->h_list.l_count =
  186. cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb));
  187. suballoc_bit_start++;
  188. first_blkno++;
  189. /* We'll also be dirtied by the caller, so
  190. * this isn't absolutely necessary. */
  191. status = ocfs2_journal_dirty(handle, bhs[i]);
  192. if (status < 0) {
  193. mlog_errno(status);
  194. goto bail;
  195. }
  196. }
  197. count += num_got;
  198. }
  199. status = 0;
  200. bail:
  201. if (status < 0) {
  202. for(i = 0; i < wanted; i++) {
  203. if (bhs[i])
  204. brelse(bhs[i]);
  205. bhs[i] = NULL;
  206. }
  207. }
  208. mlog_exit(status);
  209. return status;
  210. }
  211. /*
  212. * Add an entire tree branch to our inode. eb_bh is the extent block
  213. * to start at, if we don't want to start the branch at the dinode
  214. * structure.
  215. *
  216. * last_eb_bh is required as we have to update it's next_leaf pointer
  217. * for the new last extent block.
  218. *
  219. * the new branch will be 'empty' in the sense that every block will
  220. * contain a single record with e_clusters == 0.
  221. */
  222. static int ocfs2_add_branch(struct ocfs2_super *osb,
  223. struct ocfs2_journal_handle *handle,
  224. struct inode *inode,
  225. struct buffer_head *fe_bh,
  226. struct buffer_head *eb_bh,
  227. struct buffer_head *last_eb_bh,
  228. struct ocfs2_alloc_context *meta_ac)
  229. {
  230. int status, new_blocks, i;
  231. u64 next_blkno, new_last_eb_blk;
  232. struct buffer_head *bh;
  233. struct buffer_head **new_eb_bhs = NULL;
  234. struct ocfs2_dinode *fe;
  235. struct ocfs2_extent_block *eb;
  236. struct ocfs2_extent_list *eb_el;
  237. struct ocfs2_extent_list *el;
  238. mlog_entry_void();
  239. BUG_ON(!last_eb_bh);
  240. fe = (struct ocfs2_dinode *) fe_bh->b_data;
  241. if (eb_bh) {
  242. eb = (struct ocfs2_extent_block *) eb_bh->b_data;
  243. el = &eb->h_list;
  244. } else
  245. el = &fe->id2.i_list;
  246. /* we never add a branch to a leaf. */
  247. BUG_ON(!el->l_tree_depth);
  248. new_blocks = le16_to_cpu(el->l_tree_depth);
  249. /* allocate the number of new eb blocks we need */
  250. new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *),
  251. GFP_KERNEL);
  252. if (!new_eb_bhs) {
  253. status = -ENOMEM;
  254. mlog_errno(status);
  255. goto bail;
  256. }
  257. status = ocfs2_create_new_meta_bhs(osb, handle, inode, new_blocks,
  258. meta_ac, new_eb_bhs);
  259. if (status < 0) {
  260. mlog_errno(status);
  261. goto bail;
  262. }
  263. /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be
  264. * linked with the rest of the tree.
  265. * conversly, new_eb_bhs[0] is the new bottommost leaf.
  266. *
  267. * when we leave the loop, new_last_eb_blk will point to the
  268. * newest leaf, and next_blkno will point to the topmost extent
  269. * block. */
  270. next_blkno = new_last_eb_blk = 0;
  271. for(i = 0; i < new_blocks; i++) {
  272. bh = new_eb_bhs[i];
  273. eb = (struct ocfs2_extent_block *) bh->b_data;
  274. if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
  275. OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
  276. status = -EIO;
  277. goto bail;
  278. }
  279. eb_el = &eb->h_list;
  280. status = ocfs2_journal_access(handle, inode, bh,
  281. OCFS2_JOURNAL_ACCESS_CREATE);
  282. if (status < 0) {
  283. mlog_errno(status);
  284. goto bail;
  285. }
  286. eb->h_next_leaf_blk = 0;
  287. eb_el->l_tree_depth = cpu_to_le16(i);
  288. eb_el->l_next_free_rec = cpu_to_le16(1);
  289. eb_el->l_recs[0].e_cpos = fe->i_clusters;
  290. eb_el->l_recs[0].e_blkno = cpu_to_le64(next_blkno);
  291. eb_el->l_recs[0].e_clusters = cpu_to_le32(0);
  292. if (!eb_el->l_tree_depth)
  293. new_last_eb_blk = le64_to_cpu(eb->h_blkno);
  294. status = ocfs2_journal_dirty(handle, bh);
  295. if (status < 0) {
  296. mlog_errno(status);
  297. goto bail;
  298. }
  299. next_blkno = le64_to_cpu(eb->h_blkno);
  300. }
  301. /* This is a bit hairy. We want to update up to three blocks
  302. * here without leaving any of them in an inconsistent state
  303. * in case of error. We don't have to worry about
  304. * journal_dirty erroring as it won't unless we've aborted the
  305. * handle (in which case we would never be here) so reserving
  306. * the write with journal_access is all we need to do. */
  307. status = ocfs2_journal_access(handle, inode, last_eb_bh,
  308. OCFS2_JOURNAL_ACCESS_WRITE);
  309. if (status < 0) {
  310. mlog_errno(status);
  311. goto bail;
  312. }
  313. status = ocfs2_journal_access(handle, inode, fe_bh,
  314. OCFS2_JOURNAL_ACCESS_WRITE);
  315. if (status < 0) {
  316. mlog_errno(status);
  317. goto bail;
  318. }
  319. if (eb_bh) {
  320. status = ocfs2_journal_access(handle, inode, eb_bh,
  321. OCFS2_JOURNAL_ACCESS_WRITE);
  322. if (status < 0) {
  323. mlog_errno(status);
  324. goto bail;
  325. }
  326. }
  327. /* Link the new branch into the rest of the tree (el will
  328. * either be on the fe, or the extent block passed in. */
  329. i = le16_to_cpu(el->l_next_free_rec);
  330. el->l_recs[i].e_blkno = cpu_to_le64(next_blkno);
  331. el->l_recs[i].e_cpos = fe->i_clusters;
  332. el->l_recs[i].e_clusters = 0;
  333. le16_add_cpu(&el->l_next_free_rec, 1);
  334. /* fe needs a new last extent block pointer, as does the
  335. * next_leaf on the previously last-extent-block. */
  336. fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk);
  337. eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
  338. eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk);
  339. status = ocfs2_journal_dirty(handle, last_eb_bh);
  340. if (status < 0)
  341. mlog_errno(status);
  342. status = ocfs2_journal_dirty(handle, fe_bh);
  343. if (status < 0)
  344. mlog_errno(status);
  345. if (eb_bh) {
  346. status = ocfs2_journal_dirty(handle, eb_bh);
  347. if (status < 0)
  348. mlog_errno(status);
  349. }
  350. status = 0;
  351. bail:
  352. if (new_eb_bhs) {
  353. for (i = 0; i < new_blocks; i++)
  354. if (new_eb_bhs[i])
  355. brelse(new_eb_bhs[i]);
  356. kfree(new_eb_bhs);
  357. }
  358. mlog_exit(status);
  359. return status;
  360. }
  361. /*
  362. * adds another level to the allocation tree.
  363. * returns back the new extent block so you can add a branch to it
  364. * after this call.
  365. */
  366. static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
  367. struct ocfs2_journal_handle *handle,
  368. struct inode *inode,
  369. struct buffer_head *fe_bh,
  370. struct ocfs2_alloc_context *meta_ac,
  371. struct buffer_head **ret_new_eb_bh)
  372. {
  373. int status, i;
  374. struct buffer_head *new_eb_bh = NULL;
  375. struct ocfs2_dinode *fe;
  376. struct ocfs2_extent_block *eb;
  377. struct ocfs2_extent_list *fe_el;
  378. struct ocfs2_extent_list *eb_el;
  379. mlog_entry_void();
  380. status = ocfs2_create_new_meta_bhs(osb, handle, inode, 1, meta_ac,
  381. &new_eb_bh);
  382. if (status < 0) {
  383. mlog_errno(status);
  384. goto bail;
  385. }
  386. eb = (struct ocfs2_extent_block *) new_eb_bh->b_data;
  387. if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
  388. OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
  389. status = -EIO;
  390. goto bail;
  391. }
  392. eb_el = &eb->h_list;
  393. fe = (struct ocfs2_dinode *) fe_bh->b_data;
  394. fe_el = &fe->id2.i_list;
  395. status = ocfs2_journal_access(handle, inode, new_eb_bh,
  396. OCFS2_JOURNAL_ACCESS_CREATE);
  397. if (status < 0) {
  398. mlog_errno(status);
  399. goto bail;
  400. }
  401. /* copy the fe data into the new extent block */
  402. eb_el->l_tree_depth = fe_el->l_tree_depth;
  403. eb_el->l_next_free_rec = fe_el->l_next_free_rec;
  404. for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++) {
  405. eb_el->l_recs[i].e_cpos = fe_el->l_recs[i].e_cpos;
  406. eb_el->l_recs[i].e_clusters = fe_el->l_recs[i].e_clusters;
  407. eb_el->l_recs[i].e_blkno = fe_el->l_recs[i].e_blkno;
  408. }
  409. status = ocfs2_journal_dirty(handle, new_eb_bh);
  410. if (status < 0) {
  411. mlog_errno(status);
  412. goto bail;
  413. }
  414. status = ocfs2_journal_access(handle, inode, fe_bh,
  415. OCFS2_JOURNAL_ACCESS_WRITE);
  416. if (status < 0) {
  417. mlog_errno(status);
  418. goto bail;
  419. }
  420. /* update fe now */
  421. le16_add_cpu(&fe_el->l_tree_depth, 1);
  422. fe_el->l_recs[0].e_cpos = 0;
  423. fe_el->l_recs[0].e_blkno = eb->h_blkno;
  424. fe_el->l_recs[0].e_clusters = fe->i_clusters;
  425. for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++) {
  426. fe_el->l_recs[i].e_cpos = 0;
  427. fe_el->l_recs[i].e_clusters = 0;
  428. fe_el->l_recs[i].e_blkno = 0;
  429. }
  430. fe_el->l_next_free_rec = cpu_to_le16(1);
  431. /* If this is our 1st tree depth shift, then last_eb_blk
  432. * becomes the allocated extent block */
  433. if (fe_el->l_tree_depth == cpu_to_le16(1))
  434. fe->i_last_eb_blk = eb->h_blkno;
  435. status = ocfs2_journal_dirty(handle, fe_bh);
  436. if (status < 0) {
  437. mlog_errno(status);
  438. goto bail;
  439. }
  440. *ret_new_eb_bh = new_eb_bh;
  441. new_eb_bh = NULL;
  442. status = 0;
  443. bail:
  444. if (new_eb_bh)
  445. brelse(new_eb_bh);
  446. mlog_exit(status);
  447. return status;
  448. }
  449. /*
  450. * Expects the tree to already have room in the rightmost leaf for the
  451. * extent. Updates all the extent blocks (and the dinode) on the way
  452. * down.
  453. */
  454. static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
  455. struct ocfs2_journal_handle *handle,
  456. struct inode *inode,
  457. struct buffer_head *fe_bh,
  458. u64 start_blk,
  459. u32 new_clusters)
  460. {
  461. int status, i, num_bhs = 0;
  462. u64 next_blkno;
  463. u16 next_free;
  464. struct buffer_head **eb_bhs = NULL;
  465. struct ocfs2_dinode *fe;
  466. struct ocfs2_extent_block *eb;
  467. struct ocfs2_extent_list *el;
  468. mlog_entry_void();
  469. status = ocfs2_journal_access(handle, inode, fe_bh,
  470. OCFS2_JOURNAL_ACCESS_WRITE);
  471. if (status < 0) {
  472. mlog_errno(status);
  473. goto bail;
  474. }
  475. fe = (struct ocfs2_dinode *) fe_bh->b_data;
  476. el = &fe->id2.i_list;
  477. if (el->l_tree_depth) {
  478. /* This is another operation where we want to be
  479. * careful about our tree updates. An error here means
  480. * none of the previous changes we made should roll
  481. * forward. As a result, we have to record the buffers
  482. * for this part of the tree in an array and reserve a
  483. * journal write to them before making any changes. */
  484. num_bhs = le16_to_cpu(fe->id2.i_list.l_tree_depth);
  485. eb_bhs = kcalloc(num_bhs, sizeof(struct buffer_head *),
  486. GFP_KERNEL);
  487. if (!eb_bhs) {
  488. status = -ENOMEM;
  489. mlog_errno(status);
  490. goto bail;
  491. }
  492. i = 0;
  493. while(el->l_tree_depth) {
  494. next_free = le16_to_cpu(el->l_next_free_rec);
  495. if (next_free == 0) {
  496. ocfs2_error(inode->i_sb,
  497. "Dinode %"MLFu64" has a bad "
  498. "extent list",
  499. OCFS2_I(inode)->ip_blkno);
  500. status = -EIO;
  501. goto bail;
  502. }
  503. next_blkno = le64_to_cpu(el->l_recs[next_free - 1].e_blkno);
  504. BUG_ON(i >= num_bhs);
  505. status = ocfs2_read_block(osb, next_blkno, &eb_bhs[i],
  506. OCFS2_BH_CACHED, inode);
  507. if (status < 0) {
  508. mlog_errno(status);
  509. goto bail;
  510. }
  511. eb = (struct ocfs2_extent_block *) eb_bhs[i]->b_data;
  512. if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
  513. OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb,
  514. eb);
  515. status = -EIO;
  516. goto bail;
  517. }
  518. status = ocfs2_journal_access(handle, inode, eb_bhs[i],
  519. OCFS2_JOURNAL_ACCESS_WRITE);
  520. if (status < 0) {
  521. mlog_errno(status);
  522. goto bail;
  523. }
  524. el = &eb->h_list;
  525. i++;
  526. /* When we leave this loop, eb_bhs[num_bhs - 1] will
  527. * hold the bottom-most leaf extent block. */
  528. }
  529. BUG_ON(el->l_tree_depth);
  530. el = &fe->id2.i_list;
  531. /* If we have tree depth, then the fe update is
  532. * trivial, and we want to switch el out for the
  533. * bottom-most leaf in order to update it with the
  534. * actual extent data below. */
  535. next_free = le16_to_cpu(el->l_next_free_rec);
  536. if (next_free == 0) {
  537. ocfs2_error(inode->i_sb,
  538. "Dinode %"MLFu64" has a bad "
  539. "extent list",
  540. OCFS2_I(inode)->ip_blkno);
  541. status = -EIO;
  542. goto bail;
  543. }
  544. le32_add_cpu(&el->l_recs[next_free - 1].e_clusters,
  545. new_clusters);
  546. /* (num_bhs - 1) to avoid the leaf */
  547. for(i = 0; i < (num_bhs - 1); i++) {
  548. eb = (struct ocfs2_extent_block *) eb_bhs[i]->b_data;
  549. el = &eb->h_list;
  550. /* finally, make our actual change to the
  551. * intermediate extent blocks. */
  552. next_free = le16_to_cpu(el->l_next_free_rec);
  553. le32_add_cpu(&el->l_recs[next_free - 1].e_clusters,
  554. new_clusters);
  555. status = ocfs2_journal_dirty(handle, eb_bhs[i]);
  556. if (status < 0)
  557. mlog_errno(status);
  558. }
  559. BUG_ON(i != (num_bhs - 1));
  560. /* note that the leaf block wasn't touched in
  561. * the loop above */
  562. eb = (struct ocfs2_extent_block *) eb_bhs[num_bhs - 1]->b_data;
  563. el = &eb->h_list;
  564. BUG_ON(el->l_tree_depth);
  565. }
  566. /* yay, we can finally add the actual extent now! */
  567. i = le16_to_cpu(el->l_next_free_rec) - 1;
  568. if (le16_to_cpu(el->l_next_free_rec) &&
  569. ocfs2_extent_contig(inode, &el->l_recs[i], start_blk)) {
  570. le32_add_cpu(&el->l_recs[i].e_clusters, new_clusters);
  571. } else if (le16_to_cpu(el->l_next_free_rec) &&
  572. (le32_to_cpu(el->l_recs[i].e_clusters) == 0)) {
  573. /* having an empty extent at eof is legal. */
  574. if (el->l_recs[i].e_cpos != fe->i_clusters) {
  575. ocfs2_error(inode->i_sb,
  576. "Dinode %"MLFu64" trailing extent is bad: "
  577. "cpos (%u) != number of clusters (%u)",
  578. le32_to_cpu(el->l_recs[i].e_cpos),
  579. le32_to_cpu(fe->i_clusters));
  580. status = -EIO;
  581. goto bail;
  582. }
  583. el->l_recs[i].e_blkno = cpu_to_le64(start_blk);
  584. el->l_recs[i].e_clusters = cpu_to_le32(new_clusters);
  585. } else {
  586. /* No contiguous record, or no empty record at eof, so
  587. * we add a new one. */
  588. BUG_ON(le16_to_cpu(el->l_next_free_rec) >=
  589. le16_to_cpu(el->l_count));
  590. i = le16_to_cpu(el->l_next_free_rec);
  591. el->l_recs[i].e_blkno = cpu_to_le64(start_blk);
  592. el->l_recs[i].e_clusters = cpu_to_le32(new_clusters);
  593. el->l_recs[i].e_cpos = fe->i_clusters;
  594. le16_add_cpu(&el->l_next_free_rec, 1);
  595. }
  596. /*
  597. * extent_map errors are not fatal, so they are ignored outside
  598. * of flushing the thing.
  599. */
  600. status = ocfs2_extent_map_append(inode, &el->l_recs[i],
  601. new_clusters);
  602. if (status) {
  603. mlog_errno(status);
  604. ocfs2_extent_map_drop(inode, le32_to_cpu(fe->i_clusters));
  605. }
  606. status = ocfs2_journal_dirty(handle, fe_bh);
  607. if (status < 0)
  608. mlog_errno(status);
  609. if (fe->id2.i_list.l_tree_depth) {
  610. status = ocfs2_journal_dirty(handle, eb_bhs[num_bhs - 1]);
  611. if (status < 0)
  612. mlog_errno(status);
  613. }
  614. status = 0;
  615. bail:
  616. if (eb_bhs) {
  617. for (i = 0; i < num_bhs; i++)
  618. if (eb_bhs[i])
  619. brelse(eb_bhs[i]);
  620. kfree(eb_bhs);
  621. }
  622. mlog_exit(status);
  623. return status;
  624. }
  625. /*
  626. * Should only be called when there is no space left in any of the
  627. * leaf nodes. What we want to do is find the lowest tree depth
  628. * non-leaf extent block with room for new records. There are three
  629. * valid results of this search:
  630. *
  631. * 1) a lowest extent block is found, then we pass it back in
  632. * *lowest_eb_bh and return '0'
  633. *
  634. * 2) the search fails to find anything, but the dinode has room. We
  635. * pass NULL back in *lowest_eb_bh, but still return '0'
  636. *
  637. * 3) the search fails to find anything AND the dinode is full, in
  638. * which case we return > 0
  639. *
  640. * return status < 0 indicates an error.
  641. */
  642. static int ocfs2_find_branch_target(struct ocfs2_super *osb,
  643. struct inode *inode,
  644. struct buffer_head *fe_bh,
  645. struct buffer_head **target_bh)
  646. {
  647. int status = 0, i;
  648. u64 blkno;
  649. struct ocfs2_dinode *fe;
  650. struct ocfs2_extent_block *eb;
  651. struct ocfs2_extent_list *el;
  652. struct buffer_head *bh = NULL;
  653. struct buffer_head *lowest_bh = NULL;
  654. mlog_entry_void();
  655. *target_bh = NULL;
  656. fe = (struct ocfs2_dinode *) fe_bh->b_data;
  657. el = &fe->id2.i_list;
  658. while(le16_to_cpu(el->l_tree_depth) > 1) {
  659. if (le16_to_cpu(el->l_next_free_rec) == 0) {
  660. ocfs2_error(inode->i_sb, "Dinode %"MLFu64" has empty "
  661. "extent list (next_free_rec == 0)",
  662. OCFS2_I(inode)->ip_blkno);
  663. status = -EIO;
  664. goto bail;
  665. }
  666. i = le16_to_cpu(el->l_next_free_rec) - 1;
  667. blkno = le64_to_cpu(el->l_recs[i].e_blkno);
  668. if (!blkno) {
  669. ocfs2_error(inode->i_sb, "Dinode %"MLFu64" has extent "
  670. "list where extent # %d has no physical "
  671. "block start",
  672. OCFS2_I(inode)->ip_blkno, i);
  673. status = -EIO;
  674. goto bail;
  675. }
  676. if (bh) {
  677. brelse(bh);
  678. bh = NULL;
  679. }
  680. status = ocfs2_read_block(osb, blkno, &bh, OCFS2_BH_CACHED,
  681. inode);
  682. if (status < 0) {
  683. mlog_errno(status);
  684. goto bail;
  685. }
  686. eb = (struct ocfs2_extent_block *) bh->b_data;
  687. if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
  688. OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
  689. status = -EIO;
  690. goto bail;
  691. }
  692. el = &eb->h_list;
  693. if (le16_to_cpu(el->l_next_free_rec) <
  694. le16_to_cpu(el->l_count)) {
  695. if (lowest_bh)
  696. brelse(lowest_bh);
  697. lowest_bh = bh;
  698. get_bh(lowest_bh);
  699. }
  700. }
  701. /* If we didn't find one and the fe doesn't have any room,
  702. * then return '1' */
  703. if (!lowest_bh
  704. && (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count))
  705. status = 1;
  706. *target_bh = lowest_bh;
  707. bail:
  708. if (bh)
  709. brelse(bh);
  710. mlog_exit(status);
  711. return status;
  712. }
  713. /* the caller needs to update fe->i_clusters */
  714. int ocfs2_insert_extent(struct ocfs2_super *osb,
  715. struct ocfs2_journal_handle *handle,
  716. struct inode *inode,
  717. struct buffer_head *fe_bh,
  718. u64 start_blk,
  719. u32 new_clusters,
  720. struct ocfs2_alloc_context *meta_ac)
  721. {
  722. int status, i, shift;
  723. struct buffer_head *last_eb_bh = NULL;
  724. struct buffer_head *bh = NULL;
  725. struct ocfs2_dinode *fe;
  726. struct ocfs2_extent_block *eb;
  727. struct ocfs2_extent_list *el;
  728. mlog_entry_void();
  729. mlog(0, "add %u clusters starting at block %"MLFu64" to "
  730. "inode %"MLFu64"\n",
  731. new_clusters, start_blk, OCFS2_I(inode)->ip_blkno);
  732. fe = (struct ocfs2_dinode *) fe_bh->b_data;
  733. el = &fe->id2.i_list;
  734. if (el->l_tree_depth) {
  735. /* jump to end of tree */
  736. status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
  737. &last_eb_bh, OCFS2_BH_CACHED, inode);
  738. if (status < 0) {
  739. mlog_exit(status);
  740. goto bail;
  741. }
  742. eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
  743. el = &eb->h_list;
  744. }
  745. /* Can we allocate without adding/shifting tree bits? */
  746. i = le16_to_cpu(el->l_next_free_rec) - 1;
  747. if (le16_to_cpu(el->l_next_free_rec) == 0
  748. || (le16_to_cpu(el->l_next_free_rec) < le16_to_cpu(el->l_count))
  749. || le32_to_cpu(el->l_recs[i].e_clusters) == 0
  750. || ocfs2_extent_contig(inode, &el->l_recs[i], start_blk))
  751. goto out_add;
  752. mlog(0, "ocfs2_allocate_extent: couldn't do a simple add, traversing "
  753. "tree now.\n");
  754. shift = ocfs2_find_branch_target(osb, inode, fe_bh, &bh);
  755. if (shift < 0) {
  756. status = shift;
  757. mlog_errno(status);
  758. goto bail;
  759. }
  760. /* We traveled all the way to the bottom of the allocation tree
  761. * and didn't find room for any more extents - we need to add
  762. * another tree level */
  763. if (shift) {
  764. /* if we hit a leaf, we'd better be empty :) */
  765. BUG_ON(le16_to_cpu(el->l_next_free_rec) !=
  766. le16_to_cpu(el->l_count));
  767. BUG_ON(bh);
  768. mlog(0, "ocfs2_allocate_extent: need to shift tree depth "
  769. "(current = %u)\n",
  770. le16_to_cpu(fe->id2.i_list.l_tree_depth));
  771. /* ocfs2_shift_tree_depth will return us a buffer with
  772. * the new extent block (so we can pass that to
  773. * ocfs2_add_branch). */
  774. status = ocfs2_shift_tree_depth(osb, handle, inode, fe_bh,
  775. meta_ac, &bh);
  776. if (status < 0) {
  777. mlog_errno(status);
  778. goto bail;
  779. }
  780. /* Special case: we have room now if we shifted from
  781. * tree_depth 0 */
  782. if (fe->id2.i_list.l_tree_depth == cpu_to_le16(1))
  783. goto out_add;
  784. }
  785. /* call ocfs2_add_branch to add the final part of the tree with
  786. * the new data. */
  787. mlog(0, "ocfs2_allocate_extent: add branch. bh = %p\n", bh);
  788. status = ocfs2_add_branch(osb, handle, inode, fe_bh, bh, last_eb_bh,
  789. meta_ac);
  790. if (status < 0) {
  791. mlog_errno(status);
  792. goto bail;
  793. }
  794. out_add:
  795. /* Finally, we can add clusters. */
  796. status = ocfs2_do_insert_extent(osb, handle, inode, fe_bh,
  797. start_blk, new_clusters);
  798. if (status < 0)
  799. mlog_errno(status);
  800. bail:
  801. if (bh)
  802. brelse(bh);
  803. if (last_eb_bh)
  804. brelse(last_eb_bh);
  805. mlog_exit(status);
  806. return status;
  807. }
  808. static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb)
  809. {
  810. struct buffer_head *tl_bh = osb->osb_tl_bh;
  811. struct ocfs2_dinode *di;
  812. struct ocfs2_truncate_log *tl;
  813. di = (struct ocfs2_dinode *) tl_bh->b_data;
  814. tl = &di->id2.i_dealloc;
  815. mlog_bug_on_msg(le16_to_cpu(tl->tl_used) > le16_to_cpu(tl->tl_count),
  816. "slot %d, invalid truncate log parameters: used = "
  817. "%u, count = %u\n", osb->slot_num,
  818. le16_to_cpu(tl->tl_used), le16_to_cpu(tl->tl_count));
  819. return le16_to_cpu(tl->tl_used) == le16_to_cpu(tl->tl_count);
  820. }
  821. static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl,
  822. unsigned int new_start)
  823. {
  824. unsigned int tail_index;
  825. unsigned int current_tail;
  826. /* No records, nothing to coalesce */
  827. if (!le16_to_cpu(tl->tl_used))
  828. return 0;
  829. tail_index = le16_to_cpu(tl->tl_used) - 1;
  830. current_tail = le32_to_cpu(tl->tl_recs[tail_index].t_start);
  831. current_tail += le32_to_cpu(tl->tl_recs[tail_index].t_clusters);
  832. return current_tail == new_start;
  833. }
  834. static int ocfs2_truncate_log_append(struct ocfs2_super *osb,
  835. struct ocfs2_journal_handle *handle,
  836. u64 start_blk,
  837. unsigned int num_clusters)
  838. {
  839. int status, index;
  840. unsigned int start_cluster, tl_count;
  841. struct inode *tl_inode = osb->osb_tl_inode;
  842. struct buffer_head *tl_bh = osb->osb_tl_bh;
  843. struct ocfs2_dinode *di;
  844. struct ocfs2_truncate_log *tl;
  845. mlog_entry("start_blk = %"MLFu64", num_clusters = %u\n", start_blk,
  846. num_clusters);
  847. BUG_ON(mutex_trylock(&tl_inode->i_mutex));
  848. start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
  849. di = (struct ocfs2_dinode *) tl_bh->b_data;
  850. tl = &di->id2.i_dealloc;
  851. if (!OCFS2_IS_VALID_DINODE(di)) {
  852. OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
  853. status = -EIO;
  854. goto bail;
  855. }
  856. tl_count = le16_to_cpu(tl->tl_count);
  857. mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
  858. tl_count == 0,
  859. "Truncate record count on #%"MLFu64" invalid ("
  860. "wanted %u, actual %u\n", OCFS2_I(tl_inode)->ip_blkno,
  861. ocfs2_truncate_recs_per_inode(osb->sb),
  862. le16_to_cpu(tl->tl_count));
  863. /* Caller should have known to flush before calling us. */
  864. index = le16_to_cpu(tl->tl_used);
  865. if (index >= tl_count) {
  866. status = -ENOSPC;
  867. mlog_errno(status);
  868. goto bail;
  869. }
  870. status = ocfs2_journal_access(handle, tl_inode, tl_bh,
  871. OCFS2_JOURNAL_ACCESS_WRITE);
  872. if (status < 0) {
  873. mlog_errno(status);
  874. goto bail;
  875. }
  876. mlog(0, "Log truncate of %u clusters starting at cluster %u to "
  877. "%"MLFu64" (index = %d)\n", num_clusters, start_cluster,
  878. OCFS2_I(tl_inode)->ip_blkno, index);
  879. if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) {
  880. /*
  881. * Move index back to the record we are coalescing with.
  882. * ocfs2_truncate_log_can_coalesce() guarantees nonzero
  883. */
  884. index--;
  885. num_clusters += le32_to_cpu(tl->tl_recs[index].t_clusters);
  886. mlog(0, "Coalesce with index %u (start = %u, clusters = %u)\n",
  887. index, le32_to_cpu(tl->tl_recs[index].t_start),
  888. num_clusters);
  889. } else {
  890. tl->tl_recs[index].t_start = cpu_to_le32(start_cluster);
  891. tl->tl_used = cpu_to_le16(index + 1);
  892. }
  893. tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters);
  894. status = ocfs2_journal_dirty(handle, tl_bh);
  895. if (status < 0) {
  896. mlog_errno(status);
  897. goto bail;
  898. }
  899. bail:
  900. mlog_exit(status);
  901. return status;
  902. }
  903. static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
  904. struct ocfs2_journal_handle *handle,
  905. struct inode *data_alloc_inode,
  906. struct buffer_head *data_alloc_bh)
  907. {
  908. int status = 0;
  909. int i;
  910. unsigned int num_clusters;
  911. u64 start_blk;
  912. struct ocfs2_truncate_rec rec;
  913. struct ocfs2_dinode *di;
  914. struct ocfs2_truncate_log *tl;
  915. struct inode *tl_inode = osb->osb_tl_inode;
  916. struct buffer_head *tl_bh = osb->osb_tl_bh;
  917. mlog_entry_void();
  918. di = (struct ocfs2_dinode *) tl_bh->b_data;
  919. tl = &di->id2.i_dealloc;
  920. i = le16_to_cpu(tl->tl_used) - 1;
  921. while (i >= 0) {
  922. /* Caller has given us at least enough credits to
  923. * update the truncate log dinode */
  924. status = ocfs2_journal_access(handle, tl_inode, tl_bh,
  925. OCFS2_JOURNAL_ACCESS_WRITE);
  926. if (status < 0) {
  927. mlog_errno(status);
  928. goto bail;
  929. }
  930. tl->tl_used = cpu_to_le16(i);
  931. status = ocfs2_journal_dirty(handle, tl_bh);
  932. if (status < 0) {
  933. mlog_errno(status);
  934. goto bail;
  935. }
  936. /* TODO: Perhaps we can calculate the bulk of the
  937. * credits up front rather than extending like
  938. * this. */
  939. status = ocfs2_extend_trans(handle,
  940. OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
  941. if (status < 0) {
  942. mlog_errno(status);
  943. goto bail;
  944. }
  945. rec = tl->tl_recs[i];
  946. start_blk = ocfs2_clusters_to_blocks(data_alloc_inode->i_sb,
  947. le32_to_cpu(rec.t_start));
  948. num_clusters = le32_to_cpu(rec.t_clusters);
  949. /* if start_blk is not set, we ignore the record as
  950. * invalid. */
  951. if (start_blk) {
  952. mlog(0, "free record %d, start = %u, clusters = %u\n",
  953. i, le32_to_cpu(rec.t_start), num_clusters);
  954. status = ocfs2_free_clusters(handle, data_alloc_inode,
  955. data_alloc_bh, start_blk,
  956. num_clusters);
  957. if (status < 0) {
  958. mlog_errno(status);
  959. goto bail;
  960. }
  961. }
  962. i--;
  963. }
  964. bail:
  965. mlog_exit(status);
  966. return status;
  967. }
  968. /* Expects you to already be holding tl_inode->i_mutex */
  969. static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
  970. {
  971. int status;
  972. unsigned int num_to_flush;
  973. struct ocfs2_journal_handle *handle = NULL;
  974. struct inode *tl_inode = osb->osb_tl_inode;
  975. struct inode *data_alloc_inode = NULL;
  976. struct buffer_head *tl_bh = osb->osb_tl_bh;
  977. struct buffer_head *data_alloc_bh = NULL;
  978. struct ocfs2_dinode *di;
  979. struct ocfs2_truncate_log *tl;
  980. mlog_entry_void();
  981. BUG_ON(mutex_trylock(&tl_inode->i_mutex));
  982. di = (struct ocfs2_dinode *) tl_bh->b_data;
  983. tl = &di->id2.i_dealloc;
  984. if (!OCFS2_IS_VALID_DINODE(di)) {
  985. OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
  986. status = -EIO;
  987. goto bail;
  988. }
  989. num_to_flush = le16_to_cpu(tl->tl_used);
  990. mlog(0, "Flush %u records from truncate log #%"MLFu64"\n",
  991. num_to_flush, OCFS2_I(tl_inode)->ip_blkno);
  992. if (!num_to_flush) {
  993. status = 0;
  994. goto bail;
  995. }
  996. handle = ocfs2_alloc_handle(osb);
  997. if (!handle) {
  998. status = -ENOMEM;
  999. mlog_errno(status);
  1000. goto bail;
  1001. }
  1002. data_alloc_inode = ocfs2_get_system_file_inode(osb,
  1003. GLOBAL_BITMAP_SYSTEM_INODE,
  1004. OCFS2_INVALID_SLOT);
  1005. if (!data_alloc_inode) {
  1006. status = -EINVAL;
  1007. mlog(ML_ERROR, "Could not get bitmap inode!\n");
  1008. goto bail;
  1009. }
  1010. ocfs2_handle_add_inode(handle, data_alloc_inode);
  1011. status = ocfs2_meta_lock(data_alloc_inode, handle, &data_alloc_bh, 1);
  1012. if (status < 0) {
  1013. mlog_errno(status);
  1014. goto bail;
  1015. }
  1016. handle = ocfs2_start_trans(osb, handle, OCFS2_TRUNCATE_LOG_UPDATE);
  1017. if (IS_ERR(handle)) {
  1018. status = PTR_ERR(handle);
  1019. handle = NULL;
  1020. mlog_errno(status);
  1021. goto bail;
  1022. }
  1023. status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode,
  1024. data_alloc_bh);
  1025. if (status < 0) {
  1026. mlog_errno(status);
  1027. goto bail;
  1028. }
  1029. bail:
  1030. if (handle)
  1031. ocfs2_commit_trans(handle);
  1032. if (data_alloc_inode)
  1033. iput(data_alloc_inode);
  1034. if (data_alloc_bh)
  1035. brelse(data_alloc_bh);
  1036. mlog_exit(status);
  1037. return status;
  1038. }
  1039. int ocfs2_flush_truncate_log(struct ocfs2_super *osb)
  1040. {
  1041. int status;
  1042. struct inode *tl_inode = osb->osb_tl_inode;
  1043. mutex_lock(&tl_inode->i_mutex);
  1044. status = __ocfs2_flush_truncate_log(osb);
  1045. mutex_unlock(&tl_inode->i_mutex);
  1046. return status;
  1047. }
  1048. static void ocfs2_truncate_log_worker(void *data)
  1049. {
  1050. int status;
  1051. struct ocfs2_super *osb = data;
  1052. mlog_entry_void();
  1053. status = ocfs2_flush_truncate_log(osb);
  1054. if (status < 0)
  1055. mlog_errno(status);
  1056. mlog_exit(status);
  1057. }
  1058. #define OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL (2 * HZ)
  1059. void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
  1060. int cancel)
  1061. {
  1062. if (osb->osb_tl_inode) {
  1063. /* We want to push off log flushes while truncates are
  1064. * still running. */
  1065. if (cancel)
  1066. cancel_delayed_work(&osb->osb_truncate_log_wq);
  1067. queue_delayed_work(ocfs2_wq, &osb->osb_truncate_log_wq,
  1068. OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL);
  1069. }
  1070. }
  1071. static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
  1072. int slot_num,
  1073. struct inode **tl_inode,
  1074. struct buffer_head **tl_bh)
  1075. {
  1076. int status;
  1077. struct inode *inode = NULL;
  1078. struct buffer_head *bh = NULL;
  1079. inode = ocfs2_get_system_file_inode(osb,
  1080. TRUNCATE_LOG_SYSTEM_INODE,
  1081. slot_num);
  1082. if (!inode) {
  1083. status = -EINVAL;
  1084. mlog(ML_ERROR, "Could not get load truncate log inode!\n");
  1085. goto bail;
  1086. }
  1087. status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
  1088. OCFS2_BH_CACHED, inode);
  1089. if (status < 0) {
  1090. iput(inode);
  1091. mlog_errno(status);
  1092. goto bail;
  1093. }
  1094. *tl_inode = inode;
  1095. *tl_bh = bh;
  1096. bail:
  1097. mlog_exit(status);
  1098. return status;
  1099. }
  1100. /* called during the 1st stage of node recovery. we stamp a clean
  1101. * truncate log and pass back a copy for processing later. if the
  1102. * truncate log does not require processing, a *tl_copy is set to
  1103. * NULL. */
  1104. int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
  1105. int slot_num,
  1106. struct ocfs2_dinode **tl_copy)
  1107. {
  1108. int status;
  1109. struct inode *tl_inode = NULL;
  1110. struct buffer_head *tl_bh = NULL;
  1111. struct ocfs2_dinode *di;
  1112. struct ocfs2_truncate_log *tl;
  1113. *tl_copy = NULL;
  1114. mlog(0, "recover truncate log from slot %d\n", slot_num);
  1115. status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh);
  1116. if (status < 0) {
  1117. mlog_errno(status);
  1118. goto bail;
  1119. }
  1120. di = (struct ocfs2_dinode *) tl_bh->b_data;
  1121. tl = &di->id2.i_dealloc;
  1122. if (!OCFS2_IS_VALID_DINODE(di)) {
  1123. OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di);
  1124. status = -EIO;
  1125. goto bail;
  1126. }
  1127. if (le16_to_cpu(tl->tl_used)) {
  1128. mlog(0, "We'll have %u logs to recover\n",
  1129. le16_to_cpu(tl->tl_used));
  1130. *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL);
  1131. if (!(*tl_copy)) {
  1132. status = -ENOMEM;
  1133. mlog_errno(status);
  1134. goto bail;
  1135. }
  1136. /* Assuming the write-out below goes well, this copy
  1137. * will be passed back to recovery for processing. */
  1138. memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size);
  1139. /* All we need to do to clear the truncate log is set
  1140. * tl_used. */
  1141. tl->tl_used = 0;
  1142. status = ocfs2_write_block(osb, tl_bh, tl_inode);
  1143. if (status < 0) {
  1144. mlog_errno(status);
  1145. goto bail;
  1146. }
  1147. }
  1148. bail:
  1149. if (tl_inode)
  1150. iput(tl_inode);
  1151. if (tl_bh)
  1152. brelse(tl_bh);
  1153. if (status < 0 && (*tl_copy)) {
  1154. kfree(*tl_copy);
  1155. *tl_copy = NULL;
  1156. }
  1157. mlog_exit(status);
  1158. return status;
  1159. }
  1160. int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
  1161. struct ocfs2_dinode *tl_copy)
  1162. {
  1163. int status = 0;
  1164. int i;
  1165. unsigned int clusters, num_recs, start_cluster;
  1166. u64 start_blk;
  1167. struct ocfs2_journal_handle *handle;
  1168. struct inode *tl_inode = osb->osb_tl_inode;
  1169. struct ocfs2_truncate_log *tl;
  1170. mlog_entry_void();
  1171. if (OCFS2_I(tl_inode)->ip_blkno == le64_to_cpu(tl_copy->i_blkno)) {
  1172. mlog(ML_ERROR, "Asked to recover my own truncate log!\n");
  1173. return -EINVAL;
  1174. }
  1175. tl = &tl_copy->id2.i_dealloc;
  1176. num_recs = le16_to_cpu(tl->tl_used);
  1177. mlog(0, "cleanup %u records from %"MLFu64"\n", num_recs,
  1178. tl_copy->i_blkno);
  1179. mutex_lock(&tl_inode->i_mutex);
  1180. for(i = 0; i < num_recs; i++) {
  1181. if (ocfs2_truncate_log_needs_flush(osb)) {
  1182. status = __ocfs2_flush_truncate_log(osb);
  1183. if (status < 0) {
  1184. mlog_errno(status);
  1185. goto bail_up;
  1186. }
  1187. }
  1188. handle = ocfs2_start_trans(osb, NULL,
  1189. OCFS2_TRUNCATE_LOG_UPDATE);
  1190. if (IS_ERR(handle)) {
  1191. status = PTR_ERR(handle);
  1192. mlog_errno(status);
  1193. goto bail_up;
  1194. }
  1195. clusters = le32_to_cpu(tl->tl_recs[i].t_clusters);
  1196. start_cluster = le32_to_cpu(tl->tl_recs[i].t_start);
  1197. start_blk = ocfs2_clusters_to_blocks(osb->sb, start_cluster);
  1198. status = ocfs2_truncate_log_append(osb, handle,
  1199. start_blk, clusters);
  1200. ocfs2_commit_trans(handle);
  1201. if (status < 0) {
  1202. mlog_errno(status);
  1203. goto bail_up;
  1204. }
  1205. }
  1206. bail_up:
  1207. mutex_unlock(&tl_inode->i_mutex);
  1208. mlog_exit(status);
  1209. return status;
  1210. }
  1211. void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb)
  1212. {
  1213. int status;
  1214. struct inode *tl_inode = osb->osb_tl_inode;
  1215. mlog_entry_void();
  1216. if (tl_inode) {
  1217. cancel_delayed_work(&osb->osb_truncate_log_wq);
  1218. flush_workqueue(ocfs2_wq);
  1219. status = ocfs2_flush_truncate_log(osb);
  1220. if (status < 0)
  1221. mlog_errno(status);
  1222. brelse(osb->osb_tl_bh);
  1223. iput(osb->osb_tl_inode);
  1224. }
  1225. mlog_exit_void();
  1226. }
  1227. int ocfs2_truncate_log_init(struct ocfs2_super *osb)
  1228. {
  1229. int status;
  1230. struct inode *tl_inode = NULL;
  1231. struct buffer_head *tl_bh = NULL;
  1232. mlog_entry_void();
  1233. status = ocfs2_get_truncate_log_info(osb,
  1234. osb->slot_num,
  1235. &tl_inode,
  1236. &tl_bh);
  1237. if (status < 0)
  1238. mlog_errno(status);
  1239. /* ocfs2_truncate_log_shutdown keys on the existence of
  1240. * osb->osb_tl_inode so we don't set any of the osb variables
  1241. * until we're sure all is well. */
  1242. INIT_WORK(&osb->osb_truncate_log_wq, ocfs2_truncate_log_worker, osb);
  1243. osb->osb_tl_bh = tl_bh;
  1244. osb->osb_tl_inode = tl_inode;
  1245. mlog_exit(status);
  1246. return status;
  1247. }
  1248. /* This function will figure out whether the currently last extent
  1249. * block will be deleted, and if it will, what the new last extent
  1250. * block will be so we can update his h_next_leaf_blk field, as well
  1251. * as the dinodes i_last_eb_blk */
  1252. static int ocfs2_find_new_last_ext_blk(struct ocfs2_super *osb,
  1253. struct inode *inode,
  1254. struct ocfs2_dinode *fe,
  1255. u32 new_i_clusters,
  1256. struct buffer_head *old_last_eb,
  1257. struct buffer_head **new_last_eb)
  1258. {
  1259. int i, status = 0;
  1260. u64 block = 0;
  1261. struct ocfs2_extent_block *eb;
  1262. struct ocfs2_extent_list *el;
  1263. struct buffer_head *bh = NULL;
  1264. *new_last_eb = NULL;
  1265. if (!OCFS2_IS_VALID_DINODE(fe)) {
  1266. OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
  1267. status = -EIO;
  1268. goto bail;
  1269. }
  1270. /* we have no tree, so of course, no last_eb. */
  1271. if (!fe->id2.i_list.l_tree_depth)
  1272. goto bail;
  1273. /* trunc to zero special case - this makes tree_depth = 0
  1274. * regardless of what it is. */
  1275. if (!new_i_clusters)
  1276. goto bail;
  1277. eb = (struct ocfs2_extent_block *) old_last_eb->b_data;
  1278. el = &(eb->h_list);
  1279. BUG_ON(!el->l_next_free_rec);
  1280. /* Make sure that this guy will actually be empty after we
  1281. * clear away the data. */
  1282. if (le32_to_cpu(el->l_recs[0].e_cpos) < new_i_clusters)
  1283. goto bail;
  1284. /* Ok, at this point, we know that last_eb will definitely
  1285. * change, so lets traverse the tree and find the second to
  1286. * last extent block. */
  1287. el = &(fe->id2.i_list);
  1288. /* go down the tree, */
  1289. do {
  1290. for(i = (le16_to_cpu(el->l_next_free_rec) - 1); i >= 0; i--) {
  1291. if (le32_to_cpu(el->l_recs[i].e_cpos) <
  1292. new_i_clusters) {
  1293. block = le64_to_cpu(el->l_recs[i].e_blkno);
  1294. break;
  1295. }
  1296. }
  1297. BUG_ON(i < 0);
  1298. if (bh) {
  1299. brelse(bh);
  1300. bh = NULL;
  1301. }
  1302. status = ocfs2_read_block(osb, block, &bh, OCFS2_BH_CACHED,
  1303. inode);
  1304. if (status < 0) {
  1305. mlog_errno(status);
  1306. goto bail;
  1307. }
  1308. eb = (struct ocfs2_extent_block *) bh->b_data;
  1309. el = &eb->h_list;
  1310. if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
  1311. OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
  1312. status = -EIO;
  1313. goto bail;
  1314. }
  1315. } while (el->l_tree_depth);
  1316. *new_last_eb = bh;
  1317. get_bh(*new_last_eb);
  1318. mlog(0, "returning block %"MLFu64"\n", le64_to_cpu(eb->h_blkno));
  1319. bail:
  1320. if (bh)
  1321. brelse(bh);
  1322. return status;
  1323. }
  1324. static int ocfs2_do_truncate(struct ocfs2_super *osb,
  1325. unsigned int clusters_to_del,
  1326. struct inode *inode,
  1327. struct buffer_head *fe_bh,
  1328. struct buffer_head *old_last_eb_bh,
  1329. struct ocfs2_journal_handle *handle,
  1330. struct ocfs2_truncate_context *tc)
  1331. {
  1332. int status, i, depth;
  1333. struct ocfs2_dinode *fe;
  1334. struct ocfs2_extent_block *eb;
  1335. struct ocfs2_extent_block *last_eb = NULL;
  1336. struct ocfs2_extent_list *el;
  1337. struct buffer_head *eb_bh = NULL;
  1338. struct buffer_head *last_eb_bh = NULL;
  1339. u64 next_eb = 0;
  1340. u64 delete_blk = 0;
  1341. fe = (struct ocfs2_dinode *) fe_bh->b_data;
  1342. status = ocfs2_find_new_last_ext_blk(osb,
  1343. inode,
  1344. fe,
  1345. le32_to_cpu(fe->i_clusters) -
  1346. clusters_to_del,
  1347. old_last_eb_bh,
  1348. &last_eb_bh);
  1349. if (status < 0) {
  1350. mlog_errno(status);
  1351. goto bail;
  1352. }
  1353. if (last_eb_bh)
  1354. last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
  1355. status = ocfs2_journal_access(handle, inode, fe_bh,
  1356. OCFS2_JOURNAL_ACCESS_WRITE);
  1357. if (status < 0) {
  1358. mlog_errno(status);
  1359. goto bail;
  1360. }
  1361. el = &(fe->id2.i_list);
  1362. spin_lock(&OCFS2_I(inode)->ip_lock);
  1363. OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
  1364. clusters_to_del;
  1365. spin_unlock(&OCFS2_I(inode)->ip_lock);
  1366. le32_add_cpu(&fe->i_clusters, -clusters_to_del);
  1367. fe->i_mtime = cpu_to_le64(CURRENT_TIME.tv_sec);
  1368. fe->i_mtime_nsec = cpu_to_le32(CURRENT_TIME.tv_nsec);
  1369. i = le16_to_cpu(el->l_next_free_rec) - 1;
  1370. BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del);
  1371. le32_add_cpu(&el->l_recs[i].e_clusters, -clusters_to_del);
  1372. /* tree depth zero, we can just delete the clusters, otherwise
  1373. * we need to record the offset of the next level extent block
  1374. * as we may overwrite it. */
  1375. if (!el->l_tree_depth)
  1376. delete_blk = le64_to_cpu(el->l_recs[i].e_blkno)
  1377. + ocfs2_clusters_to_blocks(osb->sb,
  1378. le32_to_cpu(el->l_recs[i].e_clusters));
  1379. else
  1380. next_eb = le64_to_cpu(el->l_recs[i].e_blkno);
  1381. if (!el->l_recs[i].e_clusters) {
  1382. /* if we deleted the whole extent record, then clear
  1383. * out the other fields and update the extent
  1384. * list. For depth > 0 trees, we've already recorded
  1385. * the extent block in 'next_eb' */
  1386. el->l_recs[i].e_cpos = 0;
  1387. el->l_recs[i].e_blkno = 0;
  1388. BUG_ON(!el->l_next_free_rec);
  1389. le16_add_cpu(&el->l_next_free_rec, -1);
  1390. }
  1391. depth = le16_to_cpu(el->l_tree_depth);
  1392. if (!fe->i_clusters) {
  1393. /* trunc to zero is a special case. */
  1394. el->l_tree_depth = 0;
  1395. fe->i_last_eb_blk = 0;
  1396. } else if (last_eb)
  1397. fe->i_last_eb_blk = last_eb->h_blkno;
  1398. status = ocfs2_journal_dirty(handle, fe_bh);
  1399. if (status < 0) {
  1400. mlog_errno(status);
  1401. goto bail;
  1402. }
  1403. if (last_eb) {
  1404. /* If there will be a new last extent block, then by
  1405. * definition, there cannot be any leaves to the right of
  1406. * him. */
  1407. status = ocfs2_journal_access(handle, inode, last_eb_bh,
  1408. OCFS2_JOURNAL_ACCESS_WRITE);
  1409. if (status < 0) {
  1410. mlog_errno(status);
  1411. goto bail;
  1412. }
  1413. last_eb->h_next_leaf_blk = 0;
  1414. status = ocfs2_journal_dirty(handle, last_eb_bh);
  1415. if (status < 0) {
  1416. mlog_errno(status);
  1417. goto bail;
  1418. }
  1419. }
  1420. /* if our tree depth > 0, update all the tree blocks below us. */
  1421. while (depth) {
  1422. mlog(0, "traveling tree (depth = %d, next_eb = %"MLFu64")\n",
  1423. depth, next_eb);
  1424. status = ocfs2_read_block(osb, next_eb, &eb_bh,
  1425. OCFS2_BH_CACHED, inode);
  1426. if (status < 0) {
  1427. mlog_errno(status);
  1428. goto bail;
  1429. }
  1430. eb = (struct ocfs2_extent_block *)eb_bh->b_data;
  1431. if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
  1432. OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
  1433. status = -EIO;
  1434. goto bail;
  1435. }
  1436. el = &(eb->h_list);
  1437. status = ocfs2_journal_access(handle, inode, eb_bh,
  1438. OCFS2_JOURNAL_ACCESS_WRITE);
  1439. if (status < 0) {
  1440. mlog_errno(status);
  1441. goto bail;
  1442. }
  1443. BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0);
  1444. BUG_ON(depth != (le16_to_cpu(el->l_tree_depth) + 1));
  1445. i = le16_to_cpu(el->l_next_free_rec) - 1;
  1446. mlog(0, "extent block %"MLFu64", before: record %d: "
  1447. "(%u, %u, %"MLFu64"), next = %u\n",
  1448. le64_to_cpu(eb->h_blkno), i,
  1449. le32_to_cpu(el->l_recs[i].e_cpos),
  1450. le32_to_cpu(el->l_recs[i].e_clusters),
  1451. le64_to_cpu(el->l_recs[i].e_blkno),
  1452. le16_to_cpu(el->l_next_free_rec));
  1453. BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del);
  1454. le32_add_cpu(&el->l_recs[i].e_clusters, -clusters_to_del);
  1455. next_eb = le64_to_cpu(el->l_recs[i].e_blkno);
  1456. /* bottom-most block requires us to delete data.*/
  1457. if (!el->l_tree_depth)
  1458. delete_blk = le64_to_cpu(el->l_recs[i].e_blkno)
  1459. + ocfs2_clusters_to_blocks(osb->sb,
  1460. le32_to_cpu(el->l_recs[i].e_clusters));
  1461. if (!el->l_recs[i].e_clusters) {
  1462. el->l_recs[i].e_cpos = 0;
  1463. el->l_recs[i].e_blkno = 0;
  1464. BUG_ON(!el->l_next_free_rec);
  1465. le16_add_cpu(&el->l_next_free_rec, -1);
  1466. }
  1467. mlog(0, "extent block %"MLFu64", after: record %d: "
  1468. "(%u, %u, %"MLFu64"), next = %u\n",
  1469. le64_to_cpu(eb->h_blkno), i,
  1470. le32_to_cpu(el->l_recs[i].e_cpos),
  1471. le32_to_cpu(el->l_recs[i].e_clusters),
  1472. le64_to_cpu(el->l_recs[i].e_blkno),
  1473. le16_to_cpu(el->l_next_free_rec));
  1474. status = ocfs2_journal_dirty(handle, eb_bh);
  1475. if (status < 0) {
  1476. mlog_errno(status);
  1477. goto bail;
  1478. }
  1479. if (!el->l_next_free_rec) {
  1480. mlog(0, "deleting this extent block.\n");
  1481. ocfs2_remove_from_cache(inode, eb_bh);
  1482. BUG_ON(eb->h_suballoc_slot);
  1483. BUG_ON(el->l_recs[0].e_clusters);
  1484. BUG_ON(el->l_recs[0].e_cpos);
  1485. BUG_ON(el->l_recs[0].e_blkno);
  1486. status = ocfs2_free_extent_block(handle,
  1487. tc->tc_ext_alloc_inode,
  1488. tc->tc_ext_alloc_bh,
  1489. eb);
  1490. if (status < 0) {
  1491. mlog_errno(status);
  1492. goto bail;
  1493. }
  1494. }
  1495. brelse(eb_bh);
  1496. eb_bh = NULL;
  1497. depth--;
  1498. }
  1499. BUG_ON(!delete_blk);
  1500. status = ocfs2_truncate_log_append(osb, handle, delete_blk,
  1501. clusters_to_del);
  1502. if (status < 0) {
  1503. mlog_errno(status);
  1504. goto bail;
  1505. }
  1506. status = 0;
  1507. bail:
  1508. if (!status)
  1509. ocfs2_extent_map_trunc(inode, le32_to_cpu(fe->i_clusters));
  1510. else
  1511. ocfs2_extent_map_drop(inode, 0);
  1512. mlog_exit(status);
  1513. return status;
  1514. }
  1515. /*
  1516. * It is expected, that by the time you call this function,
  1517. * inode->i_size and fe->i_size have been adjusted.
  1518. *
  1519. * WARNING: This will kfree the truncate context
  1520. */
  1521. int ocfs2_commit_truncate(struct ocfs2_super *osb,
  1522. struct inode *inode,
  1523. struct buffer_head *fe_bh,
  1524. struct ocfs2_truncate_context *tc)
  1525. {
  1526. int status, i, credits, tl_sem = 0;
  1527. u32 clusters_to_del, target_i_clusters;
  1528. u64 last_eb = 0;
  1529. struct ocfs2_dinode *fe;
  1530. struct ocfs2_extent_block *eb;
  1531. struct ocfs2_extent_list *el;
  1532. struct buffer_head *last_eb_bh;
  1533. struct ocfs2_journal_handle *handle = NULL;
  1534. struct inode *tl_inode = osb->osb_tl_inode;
  1535. mlog_entry_void();
  1536. down_write(&OCFS2_I(inode)->ip_alloc_sem);
  1537. target_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
  1538. i_size_read(inode));
  1539. last_eb_bh = tc->tc_last_eb_bh;
  1540. tc->tc_last_eb_bh = NULL;
  1541. fe = (struct ocfs2_dinode *) fe_bh->b_data;
  1542. if (fe->id2.i_list.l_tree_depth) {
  1543. eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
  1544. el = &eb->h_list;
  1545. } else
  1546. el = &fe->id2.i_list;
  1547. last_eb = le64_to_cpu(fe->i_last_eb_blk);
  1548. start:
  1549. mlog(0, "ocfs2_commit_truncate: fe->i_clusters = %u, "
  1550. "last_eb = %"MLFu64", fe->i_last_eb_blk = %"MLFu64", "
  1551. "fe->id2.i_list.l_tree_depth = %u last_eb_bh = %p\n",
  1552. le32_to_cpu(fe->i_clusters), last_eb,
  1553. le64_to_cpu(fe->i_last_eb_blk),
  1554. le16_to_cpu(fe->id2.i_list.l_tree_depth), last_eb_bh);
  1555. if (last_eb != le64_to_cpu(fe->i_last_eb_blk)) {
  1556. mlog(0, "last_eb changed!\n");
  1557. BUG_ON(!fe->id2.i_list.l_tree_depth);
  1558. last_eb = le64_to_cpu(fe->i_last_eb_blk);
  1559. /* i_last_eb_blk may have changed, read it if
  1560. * necessary. We don't have to worry about the
  1561. * truncate to zero case here (where there becomes no
  1562. * last_eb) because we never loop back after our work
  1563. * is done. */
  1564. if (last_eb_bh) {
  1565. brelse(last_eb_bh);
  1566. last_eb_bh = NULL;
  1567. }
  1568. status = ocfs2_read_block(osb, last_eb,
  1569. &last_eb_bh, OCFS2_BH_CACHED,
  1570. inode);
  1571. if (status < 0) {
  1572. mlog_errno(status);
  1573. goto bail;
  1574. }
  1575. eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
  1576. if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
  1577. OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
  1578. status = -EIO;
  1579. goto bail;
  1580. }
  1581. el = &(eb->h_list);
  1582. }
  1583. /* by now, el will point to the extent list on the bottom most
  1584. * portion of this tree. */
  1585. i = le16_to_cpu(el->l_next_free_rec) - 1;
  1586. if (le32_to_cpu(el->l_recs[i].e_cpos) >= target_i_clusters)
  1587. clusters_to_del = le32_to_cpu(el->l_recs[i].e_clusters);
  1588. else
  1589. clusters_to_del = (le32_to_cpu(el->l_recs[i].e_clusters) +
  1590. le32_to_cpu(el->l_recs[i].e_cpos)) -
  1591. target_i_clusters;
  1592. mlog(0, "clusters_to_del = %u in this pass\n", clusters_to_del);
  1593. mutex_lock(&tl_inode->i_mutex);
  1594. tl_sem = 1;
  1595. /* ocfs2_truncate_log_needs_flush guarantees us at least one
  1596. * record is free for use. If there isn't any, we flush to get
  1597. * an empty truncate log. */
  1598. if (ocfs2_truncate_log_needs_flush(osb)) {
  1599. status = __ocfs2_flush_truncate_log(osb);
  1600. if (status < 0) {
  1601. mlog_errno(status);
  1602. goto bail;
  1603. }
  1604. }
  1605. credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del,
  1606. fe, el);
  1607. handle = ocfs2_start_trans(osb, NULL, credits);
  1608. if (IS_ERR(handle)) {
  1609. status = PTR_ERR(handle);
  1610. handle = NULL;
  1611. mlog_errno(status);
  1612. goto bail;
  1613. }
  1614. inode->i_ctime = inode->i_mtime = CURRENT_TIME;
  1615. status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
  1616. if (status < 0)
  1617. mlog_errno(status);
  1618. status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh,
  1619. last_eb_bh, handle, tc);
  1620. if (status < 0) {
  1621. mlog_errno(status);
  1622. goto bail;
  1623. }
  1624. mutex_unlock(&tl_inode->i_mutex);
  1625. tl_sem = 0;
  1626. ocfs2_commit_trans(handle);
  1627. handle = NULL;
  1628. BUG_ON(le32_to_cpu(fe->i_clusters) < target_i_clusters);
  1629. if (le32_to_cpu(fe->i_clusters) > target_i_clusters)
  1630. goto start;
  1631. bail:
  1632. up_write(&OCFS2_I(inode)->ip_alloc_sem);
  1633. ocfs2_schedule_truncate_log_flush(osb, 1);
  1634. if (tl_sem)
  1635. mutex_unlock(&tl_inode->i_mutex);
  1636. if (handle)
  1637. ocfs2_commit_trans(handle);
  1638. if (last_eb_bh)
  1639. brelse(last_eb_bh);
  1640. /* This will drop the ext_alloc cluster lock for us */
  1641. ocfs2_free_truncate_context(tc);
  1642. mlog_exit(status);
  1643. return status;
  1644. }
  1645. /*
  1646. * Expects the inode to already be locked. This will figure out which
  1647. * inodes need to be locked and will put them on the returned truncate
  1648. * context.
  1649. */
  1650. int ocfs2_prepare_truncate(struct ocfs2_super *osb,
  1651. struct inode *inode,
  1652. struct buffer_head *fe_bh,
  1653. struct ocfs2_truncate_context **tc)
  1654. {
  1655. int status, metadata_delete;
  1656. unsigned int new_i_clusters;
  1657. struct ocfs2_dinode *fe;
  1658. struct ocfs2_extent_block *eb;
  1659. struct ocfs2_extent_list *el;
  1660. struct buffer_head *last_eb_bh = NULL;
  1661. struct inode *ext_alloc_inode = NULL;
  1662. struct buffer_head *ext_alloc_bh = NULL;
  1663. mlog_entry_void();
  1664. *tc = NULL;
  1665. new_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
  1666. i_size_read(inode));
  1667. fe = (struct ocfs2_dinode *) fe_bh->b_data;
  1668. mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size ="
  1669. "%"MLFu64"\n", fe->i_clusters, new_i_clusters, fe->i_size);
  1670. if (le32_to_cpu(fe->i_clusters) <= new_i_clusters) {
  1671. ocfs2_error(inode->i_sb, "Dinode %"MLFu64" has cluster count "
  1672. "%u and size %"MLFu64" whereas struct inode has "
  1673. "cluster count %u and size %llu which caused an "
  1674. "invalid truncate to %u clusters.",
  1675. le64_to_cpu(fe->i_blkno),
  1676. le32_to_cpu(fe->i_clusters),
  1677. le64_to_cpu(fe->i_size),
  1678. OCFS2_I(inode)->ip_clusters, i_size_read(inode),
  1679. new_i_clusters);
  1680. mlog_meta_lvb(ML_ERROR, &OCFS2_I(inode)->ip_meta_lockres);
  1681. status = -EIO;
  1682. goto bail;
  1683. }
  1684. *tc = kcalloc(1, sizeof(struct ocfs2_truncate_context), GFP_KERNEL);
  1685. if (!(*tc)) {
  1686. status = -ENOMEM;
  1687. mlog_errno(status);
  1688. goto bail;
  1689. }
  1690. metadata_delete = 0;
  1691. if (fe->id2.i_list.l_tree_depth) {
  1692. /* If we have a tree, then the truncate may result in
  1693. * metadata deletes. Figure this out from the
  1694. * rightmost leaf block.*/
  1695. status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
  1696. &last_eb_bh, OCFS2_BH_CACHED, inode);
  1697. if (status < 0) {
  1698. mlog_errno(status);
  1699. goto bail;
  1700. }
  1701. eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
  1702. if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
  1703. OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
  1704. brelse(last_eb_bh);
  1705. status = -EIO;
  1706. goto bail;
  1707. }
  1708. el = &(eb->h_list);
  1709. if (le32_to_cpu(el->l_recs[0].e_cpos) >= new_i_clusters)
  1710. metadata_delete = 1;
  1711. }
  1712. (*tc)->tc_last_eb_bh = last_eb_bh;
  1713. if (metadata_delete) {
  1714. mlog(0, "Will have to delete metadata for this trunc. "
  1715. "locking allocator.\n");
  1716. ext_alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0);
  1717. if (!ext_alloc_inode) {
  1718. status = -ENOMEM;
  1719. mlog_errno(status);
  1720. goto bail;
  1721. }
  1722. mutex_lock(&ext_alloc_inode->i_mutex);
  1723. (*tc)->tc_ext_alloc_inode = ext_alloc_inode;
  1724. status = ocfs2_meta_lock(ext_alloc_inode,
  1725. NULL,
  1726. &ext_alloc_bh,
  1727. 1);
  1728. if (status < 0) {
  1729. mlog_errno(status);
  1730. goto bail;
  1731. }
  1732. (*tc)->tc_ext_alloc_bh = ext_alloc_bh;
  1733. (*tc)->tc_ext_alloc_locked = 1;
  1734. }
  1735. status = 0;
  1736. bail:
  1737. if (status < 0) {
  1738. if (*tc)
  1739. ocfs2_free_truncate_context(*tc);
  1740. *tc = NULL;
  1741. }
  1742. mlog_exit_void();
  1743. return status;
  1744. }
  1745. static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
  1746. {
  1747. if (tc->tc_ext_alloc_inode) {
  1748. if (tc->tc_ext_alloc_locked)
  1749. ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1);
  1750. mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex);
  1751. iput(tc->tc_ext_alloc_inode);
  1752. }
  1753. if (tc->tc_ext_alloc_bh)
  1754. brelse(tc->tc_ext_alloc_bh);
  1755. if (tc->tc_last_eb_bh)
  1756. brelse(tc->tc_last_eb_bh);
  1757. kfree(tc);
  1758. }