dir.c 103 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115
  1. /* -*- mode: c; c-basic-offset: 8; -*-
  2. * vim: noexpandtab sw=8 ts=8 sts=0:
  3. *
  4. * dir.c
  5. *
  6. * Creates, reads, walks and deletes directory-nodes
  7. *
  8. * Copyright (C) 2002, 2004 Oracle. All rights reserved.
  9. *
  10. * Portions of this code from linux/fs/ext3/dir.c
  11. *
  12. * Copyright (C) 1992, 1993, 1994, 1995
  13. * Remy Card (card@masi.ibp.fr)
  14. * Laboratoire MASI - Institut Blaise pascal
  15. * Universite Pierre et Marie Curie (Paris VI)
  16. *
  17. * from
  18. *
  19. * linux/fs/minix/dir.c
  20. *
  21. * Copyright (C) 1991, 1992 Linux Torvalds
  22. *
  23. * This program is free software; you can redistribute it and/or
  24. * modify it under the terms of the GNU General Public
  25. * License as published by the Free Software Foundation; either
  26. * version 2 of the License, or (at your option) any later version.
  27. *
  28. * This program is distributed in the hope that it will be useful,
  29. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  30. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  31. * General Public License for more details.
  32. *
  33. * You should have received a copy of the GNU General Public
  34. * License along with this program; if not, write to the
  35. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  36. * Boston, MA 021110-1307, USA.
  37. */
  38. #include <linux/fs.h>
  39. #include <linux/types.h>
  40. #include <linux/slab.h>
  41. #include <linux/highmem.h>
  42. #include <linux/quotaops.h>
  43. #include <linux/sort.h>
  44. #define MLOG_MASK_PREFIX ML_NAMEI
  45. #include <cluster/masklog.h>
  46. #include "ocfs2.h"
  47. #include "alloc.h"
  48. #include "blockcheck.h"
  49. #include "dir.h"
  50. #include "dlmglue.h"
  51. #include "extent_map.h"
  52. #include "file.h"
  53. #include "inode.h"
  54. #include "journal.h"
  55. #include "namei.h"
  56. #include "suballoc.h"
  57. #include "super.h"
  58. #include "sysfile.h"
  59. #include "uptodate.h"
  60. #include "buffer_head_io.h"
  61. #define NAMEI_RA_CHUNKS 2
  62. #define NAMEI_RA_BLOCKS 4
  63. #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
  64. #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
  65. static unsigned char ocfs2_filetype_table[] = {
  66. DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
  67. };
  68. static int ocfs2_do_extend_dir(struct super_block *sb,
  69. handle_t *handle,
  70. struct inode *dir,
  71. struct buffer_head *parent_fe_bh,
  72. struct ocfs2_alloc_context *data_ac,
  73. struct ocfs2_alloc_context *meta_ac,
  74. struct buffer_head **new_bh);
  75. /*
  76. * These are distinct checks because future versions of the file system will
  77. * want to have a trailing dirent structure independent of indexing.
  78. */
  79. static int ocfs2_dir_has_trailer(struct inode *dir)
  80. {
  81. if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
  82. return 0;
  83. return ocfs2_meta_ecc(OCFS2_SB(dir->i_sb));
  84. }
  85. static int ocfs2_supports_dir_trailer(struct ocfs2_super *osb)
  86. {
  87. return ocfs2_meta_ecc(osb);
  88. }
  89. static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb)
  90. {
  91. return sb->s_blocksize - sizeof(struct ocfs2_dir_block_trailer);
  92. }
  93. #define ocfs2_trailer_from_bh(_bh, _sb) ((struct ocfs2_dir_block_trailer *) ((_bh)->b_data + ocfs2_dir_trailer_blk_off((_sb))))
  94. /* XXX ocfs2_block_dqtrailer() is similar but not quite - can we make
  95. * them more consistent? */
  96. struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize,
  97. void *data)
  98. {
  99. char *p = data;
  100. p += blocksize - sizeof(struct ocfs2_dir_block_trailer);
  101. return (struct ocfs2_dir_block_trailer *)p;
  102. }
  103. /*
  104. * XXX: This is executed once on every dirent. We should consider optimizing
  105. * it.
  106. */
  107. static int ocfs2_skip_dir_trailer(struct inode *dir,
  108. struct ocfs2_dir_entry *de,
  109. unsigned long offset,
  110. unsigned long blklen)
  111. {
  112. unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer);
  113. if (!ocfs2_dir_has_trailer(dir))
  114. return 0;
  115. if (offset != toff)
  116. return 0;
  117. return 1;
  118. }
  119. static void ocfs2_init_dir_trailer(struct inode *inode,
  120. struct buffer_head *bh)
  121. {
  122. struct ocfs2_dir_block_trailer *trailer;
  123. trailer = ocfs2_trailer_from_bh(bh, inode->i_sb);
  124. strcpy(trailer->db_signature, OCFS2_DIR_TRAILER_SIGNATURE);
  125. trailer->db_compat_rec_len =
  126. cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer));
  127. trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
  128. trailer->db_blkno = cpu_to_le64(bh->b_blocknr);
  129. }
  130. void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res)
  131. {
  132. brelse(res->dl_dx_root_bh);
  133. brelse(res->dl_leaf_bh);
  134. brelse(res->dl_dx_leaf_bh);
  135. }
  136. static int ocfs2_dir_indexed(struct inode *inode)
  137. {
  138. if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INDEXED_DIR_FL)
  139. return 1;
  140. return 0;
  141. }
  142. static inline int ocfs2_dx_root_inline(struct ocfs2_dx_root_block *dx_root)
  143. {
  144. return dx_root->dr_flags & OCFS2_DX_FLAG_INLINE;
  145. }
  146. /*
  147. * Hashing code adapted from ext3
  148. */
  149. #define DELTA 0x9E3779B9
  150. static void TEA_transform(__u32 buf[4], __u32 const in[])
  151. {
  152. __u32 sum = 0;
  153. __u32 b0 = buf[0], b1 = buf[1];
  154. __u32 a = in[0], b = in[1], c = in[2], d = in[3];
  155. int n = 16;
  156. do {
  157. sum += DELTA;
  158. b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
  159. b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
  160. } while (--n);
  161. buf[0] += b0;
  162. buf[1] += b1;
  163. }
  164. static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
  165. {
  166. __u32 pad, val;
  167. int i;
  168. pad = (__u32)len | ((__u32)len << 8);
  169. pad |= pad << 16;
  170. val = pad;
  171. if (len > num*4)
  172. len = num * 4;
  173. for (i = 0; i < len; i++) {
  174. if ((i % 4) == 0)
  175. val = pad;
  176. val = msg[i] + (val << 8);
  177. if ((i % 4) == 3) {
  178. *buf++ = val;
  179. val = pad;
  180. num--;
  181. }
  182. }
  183. if (--num >= 0)
  184. *buf++ = val;
  185. while (--num >= 0)
  186. *buf++ = pad;
  187. }
  188. static void ocfs2_dx_dir_name_hash(struct inode *dir, const char *name, int len,
  189. struct ocfs2_dx_hinfo *hinfo)
  190. {
  191. struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
  192. const char *p;
  193. __u32 in[8], buf[4];
  194. /*
  195. * XXX: Is this really necessary, if the index is never looked
  196. * at by readdir? Is a hash value of '0' a bad idea?
  197. */
  198. if ((len == 1 && !strncmp(".", name, 1)) ||
  199. (len == 2 && !strncmp("..", name, 2))) {
  200. buf[0] = buf[1] = 0;
  201. goto out;
  202. }
  203. #ifdef OCFS2_DEBUG_DX_DIRS
  204. /*
  205. * This makes it very easy to debug indexing problems. We
  206. * should never allow this to be selected without hand editing
  207. * this file though.
  208. */
  209. buf[0] = buf[1] = len;
  210. goto out;
  211. #endif
  212. memcpy(buf, osb->osb_dx_seed, sizeof(buf));
  213. p = name;
  214. while (len > 0) {
  215. str2hashbuf(p, len, in, 4);
  216. TEA_transform(buf, in);
  217. len -= 16;
  218. p += 16;
  219. }
  220. out:
  221. hinfo->major_hash = buf[0];
  222. hinfo->minor_hash = buf[1];
  223. }
  224. /*
  225. * bh passed here can be an inode block or a dir data block, depending
  226. * on the inode inline data flag.
  227. */
  228. static int ocfs2_check_dir_entry(struct inode * dir,
  229. struct ocfs2_dir_entry * de,
  230. struct buffer_head * bh,
  231. unsigned long offset)
  232. {
  233. const char *error_msg = NULL;
  234. const int rlen = le16_to_cpu(de->rec_len);
  235. if (rlen < OCFS2_DIR_REC_LEN(1))
  236. error_msg = "rec_len is smaller than minimal";
  237. else if (rlen % 4 != 0)
  238. error_msg = "rec_len % 4 != 0";
  239. else if (rlen < OCFS2_DIR_REC_LEN(de->name_len))
  240. error_msg = "rec_len is too small for name_len";
  241. else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
  242. error_msg = "directory entry across blocks";
  243. if (error_msg != NULL)
  244. mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
  245. "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
  246. (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
  247. offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
  248. de->name_len);
  249. return error_msg == NULL ? 1 : 0;
  250. }
  251. static inline int ocfs2_match(int len,
  252. const char * const name,
  253. struct ocfs2_dir_entry *de)
  254. {
  255. if (len != de->name_len)
  256. return 0;
  257. if (!de->inode)
  258. return 0;
  259. return !memcmp(name, de->name, len);
  260. }
  261. /*
  262. * Returns 0 if not found, -1 on failure, and 1 on success
  263. */
  264. static int inline ocfs2_search_dirblock(struct buffer_head *bh,
  265. struct inode *dir,
  266. const char *name, int namelen,
  267. unsigned long offset,
  268. char *first_de,
  269. unsigned int bytes,
  270. struct ocfs2_dir_entry **res_dir)
  271. {
  272. struct ocfs2_dir_entry *de;
  273. char *dlimit, *de_buf;
  274. int de_len;
  275. int ret = 0;
  276. mlog_entry_void();
  277. de_buf = first_de;
  278. dlimit = de_buf + bytes;
  279. while (de_buf < dlimit) {
  280. /* this code is executed quadratically often */
  281. /* do minimal checking `by hand' */
  282. de = (struct ocfs2_dir_entry *) de_buf;
  283. if (de_buf + namelen <= dlimit &&
  284. ocfs2_match(namelen, name, de)) {
  285. /* found a match - just to be sure, do a full check */
  286. if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
  287. ret = -1;
  288. goto bail;
  289. }
  290. *res_dir = de;
  291. ret = 1;
  292. goto bail;
  293. }
  294. /* prevent looping on a bad block */
  295. de_len = le16_to_cpu(de->rec_len);
  296. if (de_len <= 0) {
  297. ret = -1;
  298. goto bail;
  299. }
  300. de_buf += de_len;
  301. offset += de_len;
  302. }
  303. bail:
  304. mlog_exit(ret);
  305. return ret;
  306. }
  307. static struct buffer_head *ocfs2_find_entry_id(const char *name,
  308. int namelen,
  309. struct inode *dir,
  310. struct ocfs2_dir_entry **res_dir)
  311. {
  312. int ret, found;
  313. struct buffer_head *di_bh = NULL;
  314. struct ocfs2_dinode *di;
  315. struct ocfs2_inline_data *data;
  316. ret = ocfs2_read_inode_block(dir, &di_bh);
  317. if (ret) {
  318. mlog_errno(ret);
  319. goto out;
  320. }
  321. di = (struct ocfs2_dinode *)di_bh->b_data;
  322. data = &di->id2.i_data;
  323. found = ocfs2_search_dirblock(di_bh, dir, name, namelen, 0,
  324. data->id_data, i_size_read(dir), res_dir);
  325. if (found == 1)
  326. return di_bh;
  327. brelse(di_bh);
  328. out:
  329. return NULL;
  330. }
  331. static int ocfs2_validate_dir_block(struct super_block *sb,
  332. struct buffer_head *bh)
  333. {
  334. int rc;
  335. struct ocfs2_dir_block_trailer *trailer =
  336. ocfs2_trailer_from_bh(bh, sb);
  337. /*
  338. * We don't validate dirents here, that's handled
  339. * in-place when the code walks them.
  340. */
  341. mlog(0, "Validating dirblock %llu\n",
  342. (unsigned long long)bh->b_blocknr);
  343. BUG_ON(!buffer_uptodate(bh));
  344. /*
  345. * If the ecc fails, we return the error but otherwise
  346. * leave the filesystem running. We know any error is
  347. * local to this block.
  348. *
  349. * Note that we are safe to call this even if the directory
  350. * doesn't have a trailer. Filesystems without metaecc will do
  351. * nothing, and filesystems with it will have one.
  352. */
  353. rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &trailer->db_check);
  354. if (rc)
  355. mlog(ML_ERROR, "Checksum failed for dinode %llu\n",
  356. (unsigned long long)bh->b_blocknr);
  357. return rc;
  358. }
  359. /*
  360. * Validate a directory trailer.
  361. *
  362. * We check the trailer here rather than in ocfs2_validate_dir_block()
  363. * because that function doesn't have the inode to test.
  364. */
  365. static int ocfs2_check_dir_trailer(struct inode *dir, struct buffer_head *bh)
  366. {
  367. int rc = 0;
  368. struct ocfs2_dir_block_trailer *trailer;
  369. trailer = ocfs2_trailer_from_bh(bh, dir->i_sb);
  370. if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) {
  371. rc = -EINVAL;
  372. ocfs2_error(dir->i_sb,
  373. "Invalid dirblock #%llu: "
  374. "signature = %.*s\n",
  375. (unsigned long long)bh->b_blocknr, 7,
  376. trailer->db_signature);
  377. goto out;
  378. }
  379. if (le64_to_cpu(trailer->db_blkno) != bh->b_blocknr) {
  380. rc = -EINVAL;
  381. ocfs2_error(dir->i_sb,
  382. "Directory block #%llu has an invalid "
  383. "db_blkno of %llu",
  384. (unsigned long long)bh->b_blocknr,
  385. (unsigned long long)le64_to_cpu(trailer->db_blkno));
  386. goto out;
  387. }
  388. if (le64_to_cpu(trailer->db_parent_dinode) !=
  389. OCFS2_I(dir)->ip_blkno) {
  390. rc = -EINVAL;
  391. ocfs2_error(dir->i_sb,
  392. "Directory block #%llu on dinode "
  393. "#%llu has an invalid parent_dinode "
  394. "of %llu",
  395. (unsigned long long)bh->b_blocknr,
  396. (unsigned long long)OCFS2_I(dir)->ip_blkno,
  397. (unsigned long long)le64_to_cpu(trailer->db_blkno));
  398. goto out;
  399. }
  400. out:
  401. return rc;
  402. }
  403. /*
  404. * This function forces all errors to -EIO for consistency with its
  405. * predecessor, ocfs2_bread(). We haven't audited what returning the
  406. * real error codes would do to callers. We log the real codes with
  407. * mlog_errno() before we squash them.
  408. */
  409. static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
  410. struct buffer_head **bh, int flags)
  411. {
  412. int rc = 0;
  413. struct buffer_head *tmp = *bh;
  414. rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags,
  415. ocfs2_validate_dir_block);
  416. if (rc) {
  417. mlog_errno(rc);
  418. goto out;
  419. }
  420. if (!(flags & OCFS2_BH_READAHEAD) &&
  421. ocfs2_dir_has_trailer(inode)) {
  422. rc = ocfs2_check_dir_trailer(inode, tmp);
  423. if (rc) {
  424. if (!*bh)
  425. brelse(tmp);
  426. mlog_errno(rc);
  427. goto out;
  428. }
  429. }
  430. /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
  431. if (!*bh)
  432. *bh = tmp;
  433. out:
  434. return rc ? -EIO : 0;
  435. }
  436. /*
  437. * Read the block at 'phys' which belongs to this directory
  438. * inode. This function does no virtual->physical block translation -
  439. * what's passed in is assumed to be a valid directory block.
  440. */
  441. static int ocfs2_read_dir_block_direct(struct inode *dir, u64 phys,
  442. struct buffer_head **bh)
  443. {
  444. int ret;
  445. struct buffer_head *tmp = *bh;
  446. ret = ocfs2_read_block(dir, phys, &tmp, ocfs2_validate_dir_block);
  447. if (ret) {
  448. mlog_errno(ret);
  449. goto out;
  450. }
  451. if (ocfs2_supports_dir_trailer(dir)) {
  452. ret = ocfs2_check_dir_trailer(dir, tmp);
  453. if (ret) {
  454. if (!*bh)
  455. brelse(tmp);
  456. mlog_errno(ret);
  457. goto out;
  458. }
  459. }
  460. if (!ret && !*bh)
  461. *bh = tmp;
  462. out:
  463. return ret;
  464. }
  465. static int ocfs2_validate_dx_root(struct super_block *sb,
  466. struct buffer_head *bh)
  467. {
  468. int ret;
  469. struct ocfs2_dx_root_block *dx_root;
  470. BUG_ON(!buffer_uptodate(bh));
  471. dx_root = (struct ocfs2_dx_root_block *) bh->b_data;
  472. ret = ocfs2_validate_meta_ecc(sb, bh->b_data, &dx_root->dr_check);
  473. if (ret) {
  474. mlog(ML_ERROR,
  475. "Checksum failed for dir index root block %llu\n",
  476. (unsigned long long)bh->b_blocknr);
  477. return ret;
  478. }
  479. if (!OCFS2_IS_VALID_DX_ROOT(dx_root)) {
  480. ocfs2_error(sb,
  481. "Dir Index Root # %llu has bad signature %.*s",
  482. (unsigned long long)le64_to_cpu(dx_root->dr_blkno),
  483. 7, dx_root->dr_signature);
  484. return -EINVAL;
  485. }
  486. return 0;
  487. }
  488. static int ocfs2_read_dx_root(struct inode *dir, struct ocfs2_dinode *di,
  489. struct buffer_head **dx_root_bh)
  490. {
  491. int ret;
  492. u64 blkno = le64_to_cpu(di->i_dx_root);
  493. struct buffer_head *tmp = *dx_root_bh;
  494. ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_root);
  495. /* If ocfs2_read_block() got us a new bh, pass it up. */
  496. if (!ret && !*dx_root_bh)
  497. *dx_root_bh = tmp;
  498. return ret;
  499. }
  500. static int ocfs2_validate_dx_leaf(struct super_block *sb,
  501. struct buffer_head *bh)
  502. {
  503. int ret;
  504. struct ocfs2_dx_leaf *dx_leaf = (struct ocfs2_dx_leaf *)bh->b_data;
  505. BUG_ON(!buffer_uptodate(bh));
  506. ret = ocfs2_validate_meta_ecc(sb, bh->b_data, &dx_leaf->dl_check);
  507. if (ret) {
  508. mlog(ML_ERROR,
  509. "Checksum failed for dir index leaf block %llu\n",
  510. (unsigned long long)bh->b_blocknr);
  511. return ret;
  512. }
  513. if (!OCFS2_IS_VALID_DX_LEAF(dx_leaf)) {
  514. ocfs2_error(sb, "Dir Index Leaf has bad signature %.*s",
  515. 7, dx_leaf->dl_signature);
  516. return -EROFS;
  517. }
  518. return 0;
  519. }
  520. static int ocfs2_read_dx_leaf(struct inode *dir, u64 blkno,
  521. struct buffer_head **dx_leaf_bh)
  522. {
  523. int ret;
  524. struct buffer_head *tmp = *dx_leaf_bh;
  525. ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_leaf);
  526. /* If ocfs2_read_block() got us a new bh, pass it up. */
  527. if (!ret && !*dx_leaf_bh)
  528. *dx_leaf_bh = tmp;
  529. return ret;
  530. }
  531. /*
  532. * Read a series of dx_leaf blocks. This expects all buffer_head
  533. * pointers to be NULL on function entry.
  534. */
  535. static int ocfs2_read_dx_leaves(struct inode *dir, u64 start, int num,
  536. struct buffer_head **dx_leaf_bhs)
  537. {
  538. int ret;
  539. ret = ocfs2_read_blocks(dir, start, num, dx_leaf_bhs, 0,
  540. ocfs2_validate_dx_leaf);
  541. if (ret)
  542. mlog_errno(ret);
  543. return ret;
  544. }
  545. static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
  546. struct inode *dir,
  547. struct ocfs2_dir_entry **res_dir)
  548. {
  549. struct super_block *sb;
  550. struct buffer_head *bh_use[NAMEI_RA_SIZE];
  551. struct buffer_head *bh, *ret = NULL;
  552. unsigned long start, block, b;
  553. int ra_max = 0; /* Number of bh's in the readahead
  554. buffer, bh_use[] */
  555. int ra_ptr = 0; /* Current index into readahead
  556. buffer */
  557. int num = 0;
  558. int nblocks, i, err;
  559. mlog_entry_void();
  560. sb = dir->i_sb;
  561. nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
  562. start = OCFS2_I(dir)->ip_dir_start_lookup;
  563. if (start >= nblocks)
  564. start = 0;
  565. block = start;
  566. restart:
  567. do {
  568. /*
  569. * We deal with the read-ahead logic here.
  570. */
  571. if (ra_ptr >= ra_max) {
  572. /* Refill the readahead buffer */
  573. ra_ptr = 0;
  574. b = block;
  575. for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
  576. /*
  577. * Terminate if we reach the end of the
  578. * directory and must wrap, or if our
  579. * search has finished at this block.
  580. */
  581. if (b >= nblocks || (num && block == start)) {
  582. bh_use[ra_max] = NULL;
  583. break;
  584. }
  585. num++;
  586. bh = NULL;
  587. err = ocfs2_read_dir_block(dir, b++, &bh,
  588. OCFS2_BH_READAHEAD);
  589. bh_use[ra_max] = bh;
  590. }
  591. }
  592. if ((bh = bh_use[ra_ptr++]) == NULL)
  593. goto next;
  594. if (ocfs2_read_dir_block(dir, block, &bh, 0)) {
  595. /* read error, skip block & hope for the best.
  596. * ocfs2_read_dir_block() has released the bh. */
  597. ocfs2_error(dir->i_sb, "reading directory %llu, "
  598. "offset %lu\n",
  599. (unsigned long long)OCFS2_I(dir)->ip_blkno,
  600. block);
  601. goto next;
  602. }
  603. i = ocfs2_search_dirblock(bh, dir, name, namelen,
  604. block << sb->s_blocksize_bits,
  605. bh->b_data, sb->s_blocksize,
  606. res_dir);
  607. if (i == 1) {
  608. OCFS2_I(dir)->ip_dir_start_lookup = block;
  609. ret = bh;
  610. goto cleanup_and_exit;
  611. } else {
  612. brelse(bh);
  613. if (i < 0)
  614. goto cleanup_and_exit;
  615. }
  616. next:
  617. if (++block >= nblocks)
  618. block = 0;
  619. } while (block != start);
  620. /*
  621. * If the directory has grown while we were searching, then
  622. * search the last part of the directory before giving up.
  623. */
  624. block = nblocks;
  625. nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
  626. if (block < nblocks) {
  627. start = 0;
  628. goto restart;
  629. }
  630. cleanup_and_exit:
  631. /* Clean up the read-ahead blocks */
  632. for (; ra_ptr < ra_max; ra_ptr++)
  633. brelse(bh_use[ra_ptr]);
  634. mlog_exit_ptr(ret);
  635. return ret;
  636. }
  637. static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
  638. struct ocfs2_extent_list *el,
  639. u32 major_hash,
  640. u32 *ret_cpos,
  641. u64 *ret_phys_blkno,
  642. unsigned int *ret_clen)
  643. {
  644. int ret = 0, i, found;
  645. struct buffer_head *eb_bh = NULL;
  646. struct ocfs2_extent_block *eb;
  647. struct ocfs2_extent_rec *rec = NULL;
  648. if (el->l_tree_depth) {
  649. ret = ocfs2_find_leaf(inode, el, major_hash, &eb_bh);
  650. if (ret) {
  651. mlog_errno(ret);
  652. goto out;
  653. }
  654. eb = (struct ocfs2_extent_block *) eb_bh->b_data;
  655. el = &eb->h_list;
  656. if (el->l_tree_depth) {
  657. ocfs2_error(inode->i_sb,
  658. "Inode %lu has non zero tree depth in "
  659. "btree tree block %llu\n", inode->i_ino,
  660. (unsigned long long)eb_bh->b_blocknr);
  661. ret = -EROFS;
  662. goto out;
  663. }
  664. }
  665. found = 0;
  666. for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
  667. rec = &el->l_recs[i];
  668. if (le32_to_cpu(rec->e_cpos) <= major_hash) {
  669. found = 1;
  670. break;
  671. }
  672. }
  673. if (!found) {
  674. ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
  675. "record (%u, %u, 0) in btree", inode->i_ino,
  676. le32_to_cpu(rec->e_cpos),
  677. ocfs2_rec_clusters(el, rec));
  678. ret = -EROFS;
  679. goto out;
  680. }
  681. if (ret_phys_blkno)
  682. *ret_phys_blkno = le64_to_cpu(rec->e_blkno);
  683. if (ret_cpos)
  684. *ret_cpos = le32_to_cpu(rec->e_cpos);
  685. if (ret_clen)
  686. *ret_clen = le16_to_cpu(rec->e_leaf_clusters);
  687. out:
  688. brelse(eb_bh);
  689. return ret;
  690. }
  691. /*
  692. * Returns the block index, from the start of the cluster which this
  693. * hash belongs too.
  694. */
  695. static inline unsigned int __ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
  696. u32 minor_hash)
  697. {
  698. return minor_hash & osb->osb_dx_mask;
  699. }
  700. static inline unsigned int ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
  701. struct ocfs2_dx_hinfo *hinfo)
  702. {
  703. return __ocfs2_dx_dir_hash_idx(osb, hinfo->minor_hash);
  704. }
  705. static int ocfs2_dx_dir_lookup(struct inode *inode,
  706. struct ocfs2_extent_list *el,
  707. struct ocfs2_dx_hinfo *hinfo,
  708. u32 *ret_cpos,
  709. u64 *ret_phys_blkno)
  710. {
  711. int ret = 0;
  712. unsigned int cend, uninitialized_var(clen);
  713. u32 uninitialized_var(cpos);
  714. u64 uninitialized_var(blkno);
  715. u32 name_hash = hinfo->major_hash;
  716. ret = ocfs2_dx_dir_lookup_rec(inode, el, name_hash, &cpos, &blkno,
  717. &clen);
  718. if (ret) {
  719. mlog_errno(ret);
  720. goto out;
  721. }
  722. cend = cpos + clen;
  723. if (name_hash >= cend) {
  724. /* We want the last cluster */
  725. blkno += ocfs2_clusters_to_blocks(inode->i_sb, clen - 1);
  726. cpos += clen - 1;
  727. } else {
  728. blkno += ocfs2_clusters_to_blocks(inode->i_sb,
  729. name_hash - cpos);
  730. cpos = name_hash;
  731. }
  732. /*
  733. * We now have the cluster which should hold our entry. To
  734. * find the exact block from the start of the cluster to
  735. * search, we take the lower bits of the hash.
  736. */
  737. blkno += ocfs2_dx_dir_hash_idx(OCFS2_SB(inode->i_sb), hinfo);
  738. if (ret_phys_blkno)
  739. *ret_phys_blkno = blkno;
  740. if (ret_cpos)
  741. *ret_cpos = cpos;
  742. out:
  743. return ret;
  744. }
  745. static int ocfs2_dx_dir_search(const char *name, int namelen,
  746. struct inode *dir,
  747. struct ocfs2_dx_root_block *dx_root,
  748. struct ocfs2_dir_lookup_result *res)
  749. {
  750. int ret, i, found;
  751. u64 uninitialized_var(phys);
  752. struct buffer_head *dx_leaf_bh = NULL;
  753. struct ocfs2_dx_leaf *dx_leaf;
  754. struct ocfs2_dx_entry *dx_entry = NULL;
  755. struct buffer_head *dir_ent_bh = NULL;
  756. struct ocfs2_dir_entry *dir_ent = NULL;
  757. struct ocfs2_dx_hinfo *hinfo = &res->dl_hinfo;
  758. struct ocfs2_extent_list *dr_el;
  759. struct ocfs2_dx_entry_list *entry_list;
  760. ocfs2_dx_dir_name_hash(dir, name, namelen, &res->dl_hinfo);
  761. if (ocfs2_dx_root_inline(dx_root)) {
  762. entry_list = &dx_root->dr_entries;
  763. goto search;
  764. }
  765. dr_el = &dx_root->dr_list;
  766. ret = ocfs2_dx_dir_lookup(dir, dr_el, hinfo, NULL, &phys);
  767. if (ret) {
  768. mlog_errno(ret);
  769. goto out;
  770. }
  771. mlog(0, "Dir %llu: name: \"%.*s\", lookup of hash: %u.0x%x "
  772. "returns: %llu\n",
  773. (unsigned long long)OCFS2_I(dir)->ip_blkno,
  774. namelen, name, hinfo->major_hash, hinfo->minor_hash,
  775. (unsigned long long)phys);
  776. ret = ocfs2_read_dx_leaf(dir, phys, &dx_leaf_bh);
  777. if (ret) {
  778. mlog_errno(ret);
  779. goto out;
  780. }
  781. dx_leaf = (struct ocfs2_dx_leaf *) dx_leaf_bh->b_data;
  782. mlog(0, "leaf info: num_used: %d, count: %d\n",
  783. le16_to_cpu(dx_leaf->dl_list.de_num_used),
  784. le16_to_cpu(dx_leaf->dl_list.de_count));
  785. entry_list = &dx_leaf->dl_list;
  786. search:
  787. /*
  788. * Empty leaf is legal, so no need to check for that.
  789. */
  790. found = 0;
  791. for (i = 0; i < le16_to_cpu(entry_list->de_num_used); i++) {
  792. dx_entry = &entry_list->de_entries[i];
  793. if (hinfo->major_hash != le32_to_cpu(dx_entry->dx_major_hash)
  794. || hinfo->minor_hash != le32_to_cpu(dx_entry->dx_minor_hash))
  795. continue;
  796. /*
  797. * Search unindexed leaf block now. We're not
  798. * guaranteed to find anything.
  799. */
  800. ret = ocfs2_read_dir_block_direct(dir,
  801. le64_to_cpu(dx_entry->dx_dirent_blk),
  802. &dir_ent_bh);
  803. if (ret) {
  804. mlog_errno(ret);
  805. goto out;
  806. }
  807. /*
  808. * XXX: We should check the unindexed block here,
  809. * before using it.
  810. */
  811. found = ocfs2_search_dirblock(dir_ent_bh, dir, name, namelen,
  812. 0, dir_ent_bh->b_data,
  813. dir->i_sb->s_blocksize, &dir_ent);
  814. if (found == 1)
  815. break;
  816. if (found == -1) {
  817. /* This means we found a bad directory entry. */
  818. ret = -EIO;
  819. mlog_errno(ret);
  820. goto out;
  821. }
  822. brelse(dir_ent_bh);
  823. dir_ent_bh = NULL;
  824. }
  825. if (found <= 0) {
  826. ret = -ENOENT;
  827. goto out;
  828. }
  829. res->dl_leaf_bh = dir_ent_bh;
  830. res->dl_entry = dir_ent;
  831. res->dl_dx_leaf_bh = dx_leaf_bh;
  832. res->dl_dx_entry = dx_entry;
  833. ret = 0;
  834. out:
  835. if (ret) {
  836. brelse(dx_leaf_bh);
  837. brelse(dir_ent_bh);
  838. }
  839. return ret;
  840. }
  841. static int ocfs2_find_entry_dx(const char *name, int namelen,
  842. struct inode *dir,
  843. struct ocfs2_dir_lookup_result *lookup)
  844. {
  845. int ret;
  846. struct buffer_head *di_bh = NULL;
  847. struct ocfs2_dinode *di;
  848. struct buffer_head *dx_root_bh = NULL;
  849. struct ocfs2_dx_root_block *dx_root;
  850. ret = ocfs2_read_inode_block(dir, &di_bh);
  851. if (ret) {
  852. mlog_errno(ret);
  853. goto out;
  854. }
  855. di = (struct ocfs2_dinode *)di_bh->b_data;
  856. ret = ocfs2_read_dx_root(dir, di, &dx_root_bh);
  857. if (ret) {
  858. mlog_errno(ret);
  859. goto out;
  860. }
  861. dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
  862. ret = ocfs2_dx_dir_search(name, namelen, dir, dx_root, lookup);
  863. if (ret) {
  864. if (ret != -ENOENT)
  865. mlog_errno(ret);
  866. goto out;
  867. }
  868. lookup->dl_dx_root_bh = dx_root_bh;
  869. dx_root_bh = NULL;
  870. out:
  871. brelse(di_bh);
  872. brelse(dx_root_bh);
  873. return ret;
  874. }
  875. /*
  876. * Try to find an entry of the provided name within 'dir'.
  877. *
  878. * If nothing was found, -ENOENT is returned. Otherwise, zero is
  879. * returned and the struct 'res' will contain information useful to
  880. * other directory manipulation functions.
  881. *
  882. * Caller can NOT assume anything about the contents of the
  883. * buffer_heads - they are passed back only so that it can be passed
  884. * into any one of the manipulation functions (add entry, delete
  885. * entry, etc). As an example, bh in the extent directory case is a
  886. * data block, in the inline-data case it actually points to an inode,
  887. * in the indexed directory case, multiple buffers are involved.
  888. */
  889. int ocfs2_find_entry(const char *name, int namelen,
  890. struct inode *dir, struct ocfs2_dir_lookup_result *lookup)
  891. {
  892. struct buffer_head *bh;
  893. struct ocfs2_dir_entry *res_dir = NULL;
  894. if (ocfs2_dir_indexed(dir))
  895. return ocfs2_find_entry_dx(name, namelen, dir, lookup);
  896. /*
  897. * The unindexed dir code only uses part of the lookup
  898. * structure, so there's no reason to push it down further
  899. * than this.
  900. */
  901. if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
  902. bh = ocfs2_find_entry_id(name, namelen, dir, &res_dir);
  903. else
  904. bh = ocfs2_find_entry_el(name, namelen, dir, &res_dir);
  905. if (bh == NULL)
  906. return -ENOENT;
  907. lookup->dl_leaf_bh = bh;
  908. lookup->dl_entry = res_dir;
  909. return 0;
  910. }
  911. /*
  912. * Update inode number and type of a previously found directory entry.
  913. */
  914. int ocfs2_update_entry(struct inode *dir, handle_t *handle,
  915. struct ocfs2_dir_lookup_result *res,
  916. struct inode *new_entry_inode)
  917. {
  918. int ret;
  919. ocfs2_journal_access_func access = ocfs2_journal_access_db;
  920. struct ocfs2_dir_entry *de = res->dl_entry;
  921. struct buffer_head *de_bh = res->dl_leaf_bh;
  922. /*
  923. * The same code works fine for both inline-data and extent
  924. * based directories, so no need to split this up. The only
  925. * difference is the journal_access function.
  926. */
  927. if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
  928. access = ocfs2_journal_access_di;
  929. ret = access(handle, dir, de_bh, OCFS2_JOURNAL_ACCESS_WRITE);
  930. if (ret) {
  931. mlog_errno(ret);
  932. goto out;
  933. }
  934. de->inode = cpu_to_le64(OCFS2_I(new_entry_inode)->ip_blkno);
  935. ocfs2_set_de_type(de, new_entry_inode->i_mode);
  936. ocfs2_journal_dirty(handle, de_bh);
  937. out:
  938. return ret;
  939. }
  940. /*
  941. * __ocfs2_delete_entry deletes a directory entry by merging it with the
  942. * previous entry
  943. */
  944. static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
  945. struct ocfs2_dir_entry *de_del,
  946. struct buffer_head *bh, char *first_de,
  947. unsigned int bytes)
  948. {
  949. struct ocfs2_dir_entry *de, *pde;
  950. int i, status = -ENOENT;
  951. ocfs2_journal_access_func access = ocfs2_journal_access_db;
  952. mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
  953. if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
  954. access = ocfs2_journal_access_di;
  955. i = 0;
  956. pde = NULL;
  957. de = (struct ocfs2_dir_entry *) first_de;
  958. while (i < bytes) {
  959. if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
  960. status = -EIO;
  961. mlog_errno(status);
  962. goto bail;
  963. }
  964. if (de == de_del) {
  965. status = access(handle, dir, bh,
  966. OCFS2_JOURNAL_ACCESS_WRITE);
  967. if (status < 0) {
  968. status = -EIO;
  969. mlog_errno(status);
  970. goto bail;
  971. }
  972. if (pde)
  973. le16_add_cpu(&pde->rec_len,
  974. le16_to_cpu(de->rec_len));
  975. else
  976. de->inode = 0;
  977. dir->i_version++;
  978. status = ocfs2_journal_dirty(handle, bh);
  979. goto bail;
  980. }
  981. i += le16_to_cpu(de->rec_len);
  982. pde = de;
  983. de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len));
  984. }
  985. bail:
  986. mlog_exit(status);
  987. return status;
  988. }
  989. static void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list,
  990. int index)
  991. {
  992. int num_used = le16_to_cpu(entry_list->de_num_used);
  993. if (num_used == 1 || index == (num_used - 1))
  994. goto clear;
  995. memmove(&entry_list->de_entries[index],
  996. &entry_list->de_entries[index + 1],
  997. (num_used - index - 1)*sizeof(struct ocfs2_dx_entry));
  998. clear:
  999. num_used--;
  1000. memset(&entry_list->de_entries[num_used], 0,
  1001. sizeof(struct ocfs2_dx_entry));
  1002. entry_list->de_num_used = cpu_to_le16(num_used);
  1003. }
  1004. static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
  1005. struct ocfs2_dir_lookup_result *lookup)
  1006. {
  1007. int ret, index;
  1008. struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
  1009. struct buffer_head *leaf_bh = lookup->dl_leaf_bh;
  1010. struct ocfs2_dx_leaf *dx_leaf;
  1011. struct ocfs2_dx_entry *dx_entry = lookup->dl_dx_entry;
  1012. struct ocfs2_dx_root_block *dx_root;
  1013. struct ocfs2_dx_entry_list *entry_list;
  1014. dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
  1015. if (ocfs2_dx_root_inline(dx_root)) {
  1016. entry_list = &dx_root->dr_entries;
  1017. } else {
  1018. dx_leaf = (struct ocfs2_dx_leaf *) lookup->dl_dx_leaf_bh->b_data;
  1019. entry_list = &dx_leaf->dl_list;
  1020. }
  1021. /* Neither of these are a disk corruption - that should have
  1022. * been caught by lookup, before we got here. */
  1023. BUG_ON(le16_to_cpu(entry_list->de_count) <= 0);
  1024. BUG_ON(le16_to_cpu(entry_list->de_num_used) <= 0);
  1025. index = (char *)dx_entry - (char *)entry_list->de_entries;
  1026. index /= sizeof(*dx_entry);
  1027. if (index >= le16_to_cpu(entry_list->de_num_used)) {
  1028. mlog(ML_ERROR, "Dir %llu: Bad dx_entry ptr idx %d, (%p, %p)\n",
  1029. (unsigned long long)OCFS2_I(dir)->ip_blkno, index,
  1030. entry_list, dx_entry);
  1031. return -EIO;
  1032. }
  1033. /*
  1034. * Add the block holding our index into the journal before
  1035. * removing the unindexed entry. If we get an error return
  1036. * from __ocfs2_delete_entry(), then it hasn't removed the
  1037. * entry yet. Likewise, successful return means we *must*
  1038. * remove the indexed entry.
  1039. *
  1040. * We're also careful to journal the root tree block here if
  1041. * we're going to be adding to the start of the free list.
  1042. */
  1043. if (ocfs2_dx_root_inline(dx_root)) {
  1044. ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
  1045. OCFS2_JOURNAL_ACCESS_WRITE);
  1046. if (ret) {
  1047. mlog_errno(ret);
  1048. goto out;
  1049. }
  1050. } else {
  1051. ret = ocfs2_journal_access_dl(handle, dir,
  1052. lookup->dl_dx_leaf_bh,
  1053. OCFS2_JOURNAL_ACCESS_WRITE);
  1054. if (ret) {
  1055. mlog_errno(ret);
  1056. goto out;
  1057. }
  1058. }
  1059. mlog(0, "Dir %llu: delete entry at index: %d\n",
  1060. (unsigned long long)OCFS2_I(dir)->ip_blkno, index);
  1061. ret = __ocfs2_delete_entry(handle, dir, lookup->dl_entry,
  1062. leaf_bh, leaf_bh->b_data, leaf_bh->b_size);
  1063. if (ret) {
  1064. mlog_errno(ret);
  1065. goto out;
  1066. }
  1067. ocfs2_dx_list_remove_entry(entry_list, index);
  1068. if (ocfs2_dx_root_inline(dx_root))
  1069. ocfs2_journal_dirty(handle, dx_root_bh);
  1070. else
  1071. ocfs2_journal_dirty(handle, lookup->dl_dx_leaf_bh);
  1072. out:
  1073. return ret;
  1074. }
  1075. static inline int ocfs2_delete_entry_id(handle_t *handle,
  1076. struct inode *dir,
  1077. struct ocfs2_dir_entry *de_del,
  1078. struct buffer_head *bh)
  1079. {
  1080. int ret;
  1081. struct buffer_head *di_bh = NULL;
  1082. struct ocfs2_dinode *di;
  1083. struct ocfs2_inline_data *data;
  1084. ret = ocfs2_read_inode_block(dir, &di_bh);
  1085. if (ret) {
  1086. mlog_errno(ret);
  1087. goto out;
  1088. }
  1089. di = (struct ocfs2_dinode *)di_bh->b_data;
  1090. data = &di->id2.i_data;
  1091. ret = __ocfs2_delete_entry(handle, dir, de_del, bh, data->id_data,
  1092. i_size_read(dir));
  1093. brelse(di_bh);
  1094. out:
  1095. return ret;
  1096. }
  1097. static inline int ocfs2_delete_entry_el(handle_t *handle,
  1098. struct inode *dir,
  1099. struct ocfs2_dir_entry *de_del,
  1100. struct buffer_head *bh)
  1101. {
  1102. return __ocfs2_delete_entry(handle, dir, de_del, bh, bh->b_data,
  1103. bh->b_size);
  1104. }
  1105. /*
  1106. * Delete a directory entry. Hide the details of directory
  1107. * implementation from the caller.
  1108. */
  1109. int ocfs2_delete_entry(handle_t *handle,
  1110. struct inode *dir,
  1111. struct ocfs2_dir_lookup_result *res)
  1112. {
  1113. if (ocfs2_dir_indexed(dir))
  1114. return ocfs2_delete_entry_dx(handle, dir, res);
  1115. if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
  1116. return ocfs2_delete_entry_id(handle, dir, res->dl_entry,
  1117. res->dl_leaf_bh);
  1118. return ocfs2_delete_entry_el(handle, dir, res->dl_entry,
  1119. res->dl_leaf_bh);
  1120. }
  1121. /*
  1122. * Check whether 'de' has enough room to hold an entry of
  1123. * 'new_rec_len' bytes.
  1124. */
  1125. static inline int ocfs2_dirent_would_fit(struct ocfs2_dir_entry *de,
  1126. unsigned int new_rec_len)
  1127. {
  1128. unsigned int de_really_used;
  1129. /* Check whether this is an empty record with enough space */
  1130. if (le64_to_cpu(de->inode) == 0 &&
  1131. le16_to_cpu(de->rec_len) >= new_rec_len)
  1132. return 1;
  1133. /*
  1134. * Record might have free space at the end which we can
  1135. * use.
  1136. */
  1137. de_really_used = OCFS2_DIR_REC_LEN(de->name_len);
  1138. if (le16_to_cpu(de->rec_len) >= (de_really_used + new_rec_len))
  1139. return 1;
  1140. return 0;
  1141. }
  1142. static void ocfs2_dx_dir_leaf_insert_tail(struct ocfs2_dx_leaf *dx_leaf,
  1143. struct ocfs2_dx_entry *dx_new_entry)
  1144. {
  1145. int i;
  1146. i = le16_to_cpu(dx_leaf->dl_list.de_num_used);
  1147. dx_leaf->dl_list.de_entries[i] = *dx_new_entry;
  1148. le16_add_cpu(&dx_leaf->dl_list.de_num_used, 1);
  1149. }
  1150. static void ocfs2_dx_entry_list_insert(struct ocfs2_dx_entry_list *entry_list,
  1151. struct ocfs2_dx_hinfo *hinfo,
  1152. u64 dirent_blk)
  1153. {
  1154. int i;
  1155. struct ocfs2_dx_entry *dx_entry;
  1156. i = le16_to_cpu(entry_list->de_num_used);
  1157. dx_entry = &entry_list->de_entries[i];
  1158. memset(dx_entry, 0, sizeof(*dx_entry));
  1159. dx_entry->dx_major_hash = cpu_to_le32(hinfo->major_hash);
  1160. dx_entry->dx_minor_hash = cpu_to_le32(hinfo->minor_hash);
  1161. dx_entry->dx_dirent_blk = cpu_to_le64(dirent_blk);
  1162. le16_add_cpu(&entry_list->de_num_used, 1);
  1163. }
  1164. static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle,
  1165. struct ocfs2_dx_hinfo *hinfo,
  1166. u64 dirent_blk,
  1167. struct buffer_head *dx_leaf_bh)
  1168. {
  1169. int ret;
  1170. struct ocfs2_dx_leaf *dx_leaf;
  1171. ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh,
  1172. OCFS2_JOURNAL_ACCESS_WRITE);
  1173. if (ret) {
  1174. mlog_errno(ret);
  1175. goto out;
  1176. }
  1177. dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data;
  1178. ocfs2_dx_entry_list_insert(&dx_leaf->dl_list, hinfo, dirent_blk);
  1179. ocfs2_journal_dirty(handle, dx_leaf_bh);
  1180. out:
  1181. return ret;
  1182. }
  1183. static int ocfs2_dx_inline_root_insert(struct inode *dir, handle_t *handle,
  1184. struct ocfs2_dx_hinfo *hinfo,
  1185. u64 dirent_blk,
  1186. struct buffer_head *dx_root_bh)
  1187. {
  1188. int ret;
  1189. struct ocfs2_dx_root_block *dx_root;
  1190. ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
  1191. OCFS2_JOURNAL_ACCESS_WRITE);
  1192. if (ret) {
  1193. mlog_errno(ret);
  1194. goto out;
  1195. }
  1196. dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
  1197. ocfs2_dx_entry_list_insert(&dx_root->dr_entries, hinfo, dirent_blk);
  1198. ocfs2_journal_dirty(handle, dx_root_bh);
  1199. out:
  1200. return ret;
  1201. }
  1202. static int ocfs2_dx_dir_insert(struct inode *dir, handle_t *handle,
  1203. struct ocfs2_dir_lookup_result *lookup)
  1204. {
  1205. struct ocfs2_dx_root_block *dx_root;
  1206. dx_root = (struct ocfs2_dx_root_block *)lookup->dl_dx_root_bh->b_data;
  1207. if (ocfs2_dx_root_inline(dx_root))
  1208. return ocfs2_dx_inline_root_insert(dir, handle,
  1209. &lookup->dl_hinfo,
  1210. lookup->dl_leaf_bh->b_blocknr,
  1211. lookup->dl_dx_root_bh);
  1212. return __ocfs2_dx_dir_leaf_insert(dir, handle, &lookup->dl_hinfo,
  1213. lookup->dl_leaf_bh->b_blocknr,
  1214. lookup->dl_dx_leaf_bh);
  1215. }
  1216. /* we don't always have a dentry for what we want to add, so people
  1217. * like orphan dir can call this instead.
  1218. *
  1219. * The lookup context must have been filled from
  1220. * ocfs2_prepare_dir_for_insert.
  1221. */
  1222. int __ocfs2_add_entry(handle_t *handle,
  1223. struct inode *dir,
  1224. const char *name, int namelen,
  1225. struct inode *inode, u64 blkno,
  1226. struct buffer_head *parent_fe_bh,
  1227. struct ocfs2_dir_lookup_result *lookup)
  1228. {
  1229. unsigned long offset;
  1230. unsigned short rec_len;
  1231. struct ocfs2_dir_entry *de, *de1;
  1232. struct ocfs2_dinode *di = (struct ocfs2_dinode *)parent_fe_bh->b_data;
  1233. struct super_block *sb = dir->i_sb;
  1234. int retval, status;
  1235. unsigned int size = sb->s_blocksize;
  1236. struct buffer_head *insert_bh = lookup->dl_leaf_bh;
  1237. char *data_start = insert_bh->b_data;
  1238. mlog_entry_void();
  1239. if (!namelen)
  1240. return -EINVAL;
  1241. if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
  1242. data_start = di->id2.i_data.id_data;
  1243. size = i_size_read(dir);
  1244. BUG_ON(insert_bh != parent_fe_bh);
  1245. }
  1246. rec_len = OCFS2_DIR_REC_LEN(namelen);
  1247. offset = 0;
  1248. de = (struct ocfs2_dir_entry *) data_start;
  1249. while (1) {
  1250. BUG_ON((char *)de >= (size + data_start));
  1251. /* These checks should've already been passed by the
  1252. * prepare function, but I guess we can leave them
  1253. * here anyway. */
  1254. if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
  1255. retval = -ENOENT;
  1256. goto bail;
  1257. }
  1258. if (ocfs2_match(namelen, name, de)) {
  1259. retval = -EEXIST;
  1260. goto bail;
  1261. }
  1262. /* We're guaranteed that we should have space, so we
  1263. * can't possibly have hit the trailer...right? */
  1264. mlog_bug_on_msg(ocfs2_skip_dir_trailer(dir, de, offset, size),
  1265. "Hit dir trailer trying to insert %.*s "
  1266. "(namelen %d) into directory %llu. "
  1267. "offset is %lu, trailer offset is %d\n",
  1268. namelen, name, namelen,
  1269. (unsigned long long)parent_fe_bh->b_blocknr,
  1270. offset, ocfs2_dir_trailer_blk_off(dir->i_sb));
  1271. if (ocfs2_dirent_would_fit(de, rec_len)) {
  1272. dir->i_mtime = dir->i_ctime = CURRENT_TIME;
  1273. retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
  1274. if (retval < 0) {
  1275. mlog_errno(retval);
  1276. goto bail;
  1277. }
  1278. if (insert_bh == parent_fe_bh)
  1279. status = ocfs2_journal_access_di(handle, dir,
  1280. insert_bh,
  1281. OCFS2_JOURNAL_ACCESS_WRITE);
  1282. else {
  1283. status = ocfs2_journal_access_db(handle, dir,
  1284. insert_bh,
  1285. OCFS2_JOURNAL_ACCESS_WRITE);
  1286. if (ocfs2_dir_indexed(dir)) {
  1287. status = ocfs2_dx_dir_insert(dir,
  1288. handle,
  1289. lookup);
  1290. if (status) {
  1291. mlog_errno(status);
  1292. goto bail;
  1293. }
  1294. }
  1295. }
  1296. /* By now the buffer is marked for journaling */
  1297. offset += le16_to_cpu(de->rec_len);
  1298. if (le64_to_cpu(de->inode)) {
  1299. de1 = (struct ocfs2_dir_entry *)((char *) de +
  1300. OCFS2_DIR_REC_LEN(de->name_len));
  1301. de1->rec_len =
  1302. cpu_to_le16(le16_to_cpu(de->rec_len) -
  1303. OCFS2_DIR_REC_LEN(de->name_len));
  1304. de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
  1305. de = de1;
  1306. }
  1307. de->file_type = OCFS2_FT_UNKNOWN;
  1308. if (blkno) {
  1309. de->inode = cpu_to_le64(blkno);
  1310. ocfs2_set_de_type(de, inode->i_mode);
  1311. } else
  1312. de->inode = 0;
  1313. de->name_len = namelen;
  1314. memcpy(de->name, name, namelen);
  1315. dir->i_version++;
  1316. status = ocfs2_journal_dirty(handle, insert_bh);
  1317. retval = 0;
  1318. goto bail;
  1319. }
  1320. offset += le16_to_cpu(de->rec_len);
  1321. de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
  1322. }
  1323. /* when you think about it, the assert above should prevent us
  1324. * from ever getting here. */
  1325. retval = -ENOSPC;
  1326. bail:
  1327. mlog_exit(retval);
  1328. return retval;
  1329. }
  1330. static int ocfs2_dir_foreach_blk_id(struct inode *inode,
  1331. u64 *f_version,
  1332. loff_t *f_pos, void *priv,
  1333. filldir_t filldir, int *filldir_err)
  1334. {
  1335. int ret, i, filldir_ret;
  1336. unsigned long offset = *f_pos;
  1337. struct buffer_head *di_bh = NULL;
  1338. struct ocfs2_dinode *di;
  1339. struct ocfs2_inline_data *data;
  1340. struct ocfs2_dir_entry *de;
  1341. ret = ocfs2_read_inode_block(inode, &di_bh);
  1342. if (ret) {
  1343. mlog(ML_ERROR, "Unable to read inode block for dir %llu\n",
  1344. (unsigned long long)OCFS2_I(inode)->ip_blkno);
  1345. goto out;
  1346. }
  1347. di = (struct ocfs2_dinode *)di_bh->b_data;
  1348. data = &di->id2.i_data;
  1349. while (*f_pos < i_size_read(inode)) {
  1350. revalidate:
  1351. /* If the dir block has changed since the last call to
  1352. * readdir(2), then we might be pointing to an invalid
  1353. * dirent right now. Scan from the start of the block
  1354. * to make sure. */
  1355. if (*f_version != inode->i_version) {
  1356. for (i = 0; i < i_size_read(inode) && i < offset; ) {
  1357. de = (struct ocfs2_dir_entry *)
  1358. (data->id_data + i);
  1359. /* It's too expensive to do a full
  1360. * dirent test each time round this
  1361. * loop, but we do have to test at
  1362. * least that it is non-zero. A
  1363. * failure will be detected in the
  1364. * dirent test below. */
  1365. if (le16_to_cpu(de->rec_len) <
  1366. OCFS2_DIR_REC_LEN(1))
  1367. break;
  1368. i += le16_to_cpu(de->rec_len);
  1369. }
  1370. *f_pos = offset = i;
  1371. *f_version = inode->i_version;
  1372. }
  1373. de = (struct ocfs2_dir_entry *) (data->id_data + *f_pos);
  1374. if (!ocfs2_check_dir_entry(inode, de, di_bh, *f_pos)) {
  1375. /* On error, skip the f_pos to the end. */
  1376. *f_pos = i_size_read(inode);
  1377. goto out;
  1378. }
  1379. offset += le16_to_cpu(de->rec_len);
  1380. if (le64_to_cpu(de->inode)) {
  1381. /* We might block in the next section
  1382. * if the data destination is
  1383. * currently swapped out. So, use a
  1384. * version stamp to detect whether or
  1385. * not the directory has been modified
  1386. * during the copy operation.
  1387. */
  1388. u64 version = *f_version;
  1389. unsigned char d_type = DT_UNKNOWN;
  1390. if (de->file_type < OCFS2_FT_MAX)
  1391. d_type = ocfs2_filetype_table[de->file_type];
  1392. filldir_ret = filldir(priv, de->name,
  1393. de->name_len,
  1394. *f_pos,
  1395. le64_to_cpu(de->inode),
  1396. d_type);
  1397. if (filldir_ret) {
  1398. if (filldir_err)
  1399. *filldir_err = filldir_ret;
  1400. break;
  1401. }
  1402. if (version != *f_version)
  1403. goto revalidate;
  1404. }
  1405. *f_pos += le16_to_cpu(de->rec_len);
  1406. }
  1407. out:
  1408. brelse(di_bh);
  1409. return 0;
  1410. }
  1411. /*
  1412. * NOTE: This function can be called against unindexed directories,
  1413. * and indexed ones.
  1414. */
  1415. static int ocfs2_dir_foreach_blk_el(struct inode *inode,
  1416. u64 *f_version,
  1417. loff_t *f_pos, void *priv,
  1418. filldir_t filldir, int *filldir_err)
  1419. {
  1420. int error = 0;
  1421. unsigned long offset, blk, last_ra_blk = 0;
  1422. int i, stored;
  1423. struct buffer_head * bh, * tmp;
  1424. struct ocfs2_dir_entry * de;
  1425. struct super_block * sb = inode->i_sb;
  1426. unsigned int ra_sectors = 16;
  1427. stored = 0;
  1428. bh = NULL;
  1429. offset = (*f_pos) & (sb->s_blocksize - 1);
  1430. while (!error && !stored && *f_pos < i_size_read(inode)) {
  1431. blk = (*f_pos) >> sb->s_blocksize_bits;
  1432. if (ocfs2_read_dir_block(inode, blk, &bh, 0)) {
  1433. /* Skip the corrupt dirblock and keep trying */
  1434. *f_pos += sb->s_blocksize - offset;
  1435. continue;
  1436. }
  1437. /* The idea here is to begin with 8k read-ahead and to stay
  1438. * 4k ahead of our current position.
  1439. *
  1440. * TODO: Use the pagecache for this. We just need to
  1441. * make sure it's cluster-safe... */
  1442. if (!last_ra_blk
  1443. || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
  1444. for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
  1445. i > 0; i--) {
  1446. tmp = NULL;
  1447. if (!ocfs2_read_dir_block(inode, ++blk, &tmp,
  1448. OCFS2_BH_READAHEAD))
  1449. brelse(tmp);
  1450. }
  1451. last_ra_blk = blk;
  1452. ra_sectors = 8;
  1453. }
  1454. revalidate:
  1455. /* If the dir block has changed since the last call to
  1456. * readdir(2), then we might be pointing to an invalid
  1457. * dirent right now. Scan from the start of the block
  1458. * to make sure. */
  1459. if (*f_version != inode->i_version) {
  1460. for (i = 0; i < sb->s_blocksize && i < offset; ) {
  1461. de = (struct ocfs2_dir_entry *) (bh->b_data + i);
  1462. /* It's too expensive to do a full
  1463. * dirent test each time round this
  1464. * loop, but we do have to test at
  1465. * least that it is non-zero. A
  1466. * failure will be detected in the
  1467. * dirent test below. */
  1468. if (le16_to_cpu(de->rec_len) <
  1469. OCFS2_DIR_REC_LEN(1))
  1470. break;
  1471. i += le16_to_cpu(de->rec_len);
  1472. }
  1473. offset = i;
  1474. *f_pos = ((*f_pos) & ~(sb->s_blocksize - 1))
  1475. | offset;
  1476. *f_version = inode->i_version;
  1477. }
  1478. while (!error && *f_pos < i_size_read(inode)
  1479. && offset < sb->s_blocksize) {
  1480. de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
  1481. if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
  1482. /* On error, skip the f_pos to the
  1483. next block. */
  1484. *f_pos = ((*f_pos) | (sb->s_blocksize - 1)) + 1;
  1485. brelse(bh);
  1486. goto out;
  1487. }
  1488. offset += le16_to_cpu(de->rec_len);
  1489. if (le64_to_cpu(de->inode)) {
  1490. /* We might block in the next section
  1491. * if the data destination is
  1492. * currently swapped out. So, use a
  1493. * version stamp to detect whether or
  1494. * not the directory has been modified
  1495. * during the copy operation.
  1496. */
  1497. unsigned long version = *f_version;
  1498. unsigned char d_type = DT_UNKNOWN;
  1499. if (de->file_type < OCFS2_FT_MAX)
  1500. d_type = ocfs2_filetype_table[de->file_type];
  1501. error = filldir(priv, de->name,
  1502. de->name_len,
  1503. *f_pos,
  1504. le64_to_cpu(de->inode),
  1505. d_type);
  1506. if (error) {
  1507. if (filldir_err)
  1508. *filldir_err = error;
  1509. break;
  1510. }
  1511. if (version != *f_version)
  1512. goto revalidate;
  1513. stored ++;
  1514. }
  1515. *f_pos += le16_to_cpu(de->rec_len);
  1516. }
  1517. offset = 0;
  1518. brelse(bh);
  1519. bh = NULL;
  1520. }
  1521. stored = 0;
  1522. out:
  1523. return stored;
  1524. }
  1525. static int ocfs2_dir_foreach_blk(struct inode *inode, u64 *f_version,
  1526. loff_t *f_pos, void *priv, filldir_t filldir,
  1527. int *filldir_err)
  1528. {
  1529. if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
  1530. return ocfs2_dir_foreach_blk_id(inode, f_version, f_pos, priv,
  1531. filldir, filldir_err);
  1532. return ocfs2_dir_foreach_blk_el(inode, f_version, f_pos, priv, filldir,
  1533. filldir_err);
  1534. }
  1535. /*
  1536. * This is intended to be called from inside other kernel functions,
  1537. * so we fake some arguments.
  1538. */
  1539. int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv,
  1540. filldir_t filldir)
  1541. {
  1542. int ret = 0, filldir_err = 0;
  1543. u64 version = inode->i_version;
  1544. while (*f_pos < i_size_read(inode)) {
  1545. ret = ocfs2_dir_foreach_blk(inode, &version, f_pos, priv,
  1546. filldir, &filldir_err);
  1547. if (ret || filldir_err)
  1548. break;
  1549. }
  1550. if (ret > 0)
  1551. ret = -EIO;
  1552. return 0;
  1553. }
  1554. /*
  1555. * ocfs2_readdir()
  1556. *
  1557. */
  1558. int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
  1559. {
  1560. int error = 0;
  1561. struct inode *inode = filp->f_path.dentry->d_inode;
  1562. int lock_level = 0;
  1563. mlog_entry("dirino=%llu\n",
  1564. (unsigned long long)OCFS2_I(inode)->ip_blkno);
  1565. error = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);
  1566. if (lock_level && error >= 0) {
  1567. /* We release EX lock which used to update atime
  1568. * and get PR lock again to reduce contention
  1569. * on commonly accessed directories. */
  1570. ocfs2_inode_unlock(inode, 1);
  1571. lock_level = 0;
  1572. error = ocfs2_inode_lock(inode, NULL, 0);
  1573. }
  1574. if (error < 0) {
  1575. if (error != -ENOENT)
  1576. mlog_errno(error);
  1577. /* we haven't got any yet, so propagate the error. */
  1578. goto bail_nolock;
  1579. }
  1580. error = ocfs2_dir_foreach_blk(inode, &filp->f_version, &filp->f_pos,
  1581. dirent, filldir, NULL);
  1582. ocfs2_inode_unlock(inode, lock_level);
  1583. bail_nolock:
  1584. mlog_exit(error);
  1585. return error;
  1586. }
  1587. /*
  1588. * NOTE: this should always be called with parent dir i_mutex taken.
  1589. */
  1590. int ocfs2_find_files_on_disk(const char *name,
  1591. int namelen,
  1592. u64 *blkno,
  1593. struct inode *inode,
  1594. struct ocfs2_dir_lookup_result *lookup)
  1595. {
  1596. int status = -ENOENT;
  1597. mlog(0, "name=%.*s, blkno=%p, inode=%llu\n", namelen, name, blkno,
  1598. (unsigned long long)OCFS2_I(inode)->ip_blkno);
  1599. status = ocfs2_find_entry(name, namelen, inode, lookup);
  1600. if (status)
  1601. goto leave;
  1602. *blkno = le64_to_cpu(lookup->dl_entry->inode);
  1603. status = 0;
  1604. leave:
  1605. return status;
  1606. }
  1607. /*
  1608. * Convenience function for callers which just want the block number
  1609. * mapped to a name and don't require the full dirent info, etc.
  1610. */
  1611. int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name,
  1612. int namelen, u64 *blkno)
  1613. {
  1614. int ret;
  1615. struct ocfs2_dir_lookup_result lookup = { NULL, };
  1616. ret = ocfs2_find_files_on_disk(name, namelen, blkno, dir, &lookup);
  1617. ocfs2_free_dir_lookup_result(&lookup);
  1618. return ret;
  1619. }
  1620. /* Check for a name within a directory.
  1621. *
  1622. * Return 0 if the name does not exist
  1623. * Return -EEXIST if the directory contains the name
  1624. *
  1625. * Callers should have i_mutex + a cluster lock on dir
  1626. */
  1627. int ocfs2_check_dir_for_entry(struct inode *dir,
  1628. const char *name,
  1629. int namelen)
  1630. {
  1631. int ret;
  1632. struct ocfs2_dir_lookup_result lookup = { NULL, };
  1633. mlog_entry("dir %llu, name '%.*s'\n",
  1634. (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
  1635. ret = -EEXIST;
  1636. if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0)
  1637. goto bail;
  1638. ret = 0;
  1639. bail:
  1640. ocfs2_free_dir_lookup_result(&lookup);
  1641. mlog_exit(ret);
  1642. return ret;
  1643. }
  1644. struct ocfs2_empty_dir_priv {
  1645. unsigned seen_dot;
  1646. unsigned seen_dot_dot;
  1647. unsigned seen_other;
  1648. };
  1649. static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len,
  1650. loff_t pos, u64 ino, unsigned type)
  1651. {
  1652. struct ocfs2_empty_dir_priv *p = priv;
  1653. /*
  1654. * Check the positions of "." and ".." records to be sure
  1655. * they're in the correct place.
  1656. */
  1657. if (name_len == 1 && !strncmp(".", name, 1) && pos == 0) {
  1658. p->seen_dot = 1;
  1659. return 0;
  1660. }
  1661. if (name_len == 2 && !strncmp("..", name, 2) &&
  1662. pos == OCFS2_DIR_REC_LEN(1)) {
  1663. p->seen_dot_dot = 1;
  1664. return 0;
  1665. }
  1666. p->seen_other = 1;
  1667. return 1;
  1668. }
  1669. /*
  1670. * routine to check that the specified directory is empty (for rmdir)
  1671. *
  1672. * Returns 1 if dir is empty, zero otherwise.
  1673. *
  1674. * XXX: This is a performance problem
  1675. */
  1676. int ocfs2_empty_dir(struct inode *inode)
  1677. {
  1678. int ret;
  1679. loff_t start = 0;
  1680. struct ocfs2_empty_dir_priv priv;
  1681. memset(&priv, 0, sizeof(priv));
  1682. ret = ocfs2_dir_foreach(inode, &start, &priv, ocfs2_empty_dir_filldir);
  1683. if (ret)
  1684. mlog_errno(ret);
  1685. if (!priv.seen_dot || !priv.seen_dot_dot) {
  1686. mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n",
  1687. (unsigned long long)OCFS2_I(inode)->ip_blkno);
  1688. /*
  1689. * XXX: Is it really safe to allow an unlink to continue?
  1690. */
  1691. return 1;
  1692. }
  1693. return !priv.seen_other;
  1694. }
  1695. /*
  1696. * Fills "." and ".." dirents in a new directory block. Returns dirent for
  1697. * "..", which might be used during creation of a directory with a trailing
  1698. * header. It is otherwise safe to ignore the return code.
  1699. */
  1700. static struct ocfs2_dir_entry *ocfs2_fill_initial_dirents(struct inode *inode,
  1701. struct inode *parent,
  1702. char *start,
  1703. unsigned int size)
  1704. {
  1705. struct ocfs2_dir_entry *de = (struct ocfs2_dir_entry *)start;
  1706. de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
  1707. de->name_len = 1;
  1708. de->rec_len =
  1709. cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
  1710. strcpy(de->name, ".");
  1711. ocfs2_set_de_type(de, S_IFDIR);
  1712. de = (struct ocfs2_dir_entry *) ((char *)de + le16_to_cpu(de->rec_len));
  1713. de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno);
  1714. de->rec_len = cpu_to_le16(size - OCFS2_DIR_REC_LEN(1));
  1715. de->name_len = 2;
  1716. strcpy(de->name, "..");
  1717. ocfs2_set_de_type(de, S_IFDIR);
  1718. return de;
  1719. }
  1720. /*
  1721. * This works together with code in ocfs2_mknod_locked() which sets
  1722. * the inline-data flag and initializes the inline-data section.
  1723. */
  1724. static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
  1725. handle_t *handle,
  1726. struct inode *parent,
  1727. struct inode *inode,
  1728. struct buffer_head *di_bh)
  1729. {
  1730. int ret;
  1731. struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
  1732. struct ocfs2_inline_data *data = &di->id2.i_data;
  1733. unsigned int size = le16_to_cpu(data->id_count);
  1734. ret = ocfs2_journal_access_di(handle, inode, di_bh,
  1735. OCFS2_JOURNAL_ACCESS_WRITE);
  1736. if (ret) {
  1737. mlog_errno(ret);
  1738. goto out;
  1739. }
  1740. ocfs2_fill_initial_dirents(inode, parent, data->id_data, size);
  1741. ocfs2_journal_dirty(handle, di_bh);
  1742. if (ret) {
  1743. mlog_errno(ret);
  1744. goto out;
  1745. }
  1746. i_size_write(inode, size);
  1747. inode->i_nlink = 2;
  1748. inode->i_blocks = ocfs2_inode_sector_count(inode);
  1749. ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
  1750. if (ret < 0)
  1751. mlog_errno(ret);
  1752. out:
  1753. return ret;
  1754. }
  1755. static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
  1756. handle_t *handle,
  1757. struct inode *parent,
  1758. struct inode *inode,
  1759. struct buffer_head *fe_bh,
  1760. struct ocfs2_alloc_context *data_ac,
  1761. struct buffer_head **ret_new_bh)
  1762. {
  1763. int status;
  1764. unsigned int size = osb->sb->s_blocksize;
  1765. struct buffer_head *new_bh = NULL;
  1766. struct ocfs2_dir_entry *de;
  1767. mlog_entry_void();
  1768. if (ocfs2_supports_dir_trailer(osb))
  1769. size = ocfs2_dir_trailer_blk_off(parent->i_sb);
  1770. status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
  1771. data_ac, NULL, &new_bh);
  1772. if (status < 0) {
  1773. mlog_errno(status);
  1774. goto bail;
  1775. }
  1776. ocfs2_set_new_buffer_uptodate(inode, new_bh);
  1777. status = ocfs2_journal_access_db(handle, inode, new_bh,
  1778. OCFS2_JOURNAL_ACCESS_CREATE);
  1779. if (status < 0) {
  1780. mlog_errno(status);
  1781. goto bail;
  1782. }
  1783. memset(new_bh->b_data, 0, osb->sb->s_blocksize);
  1784. de = ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, size);
  1785. if (ocfs2_supports_dir_trailer(osb))
  1786. ocfs2_init_dir_trailer(inode, new_bh);
  1787. status = ocfs2_journal_dirty(handle, new_bh);
  1788. if (status < 0) {
  1789. mlog_errno(status);
  1790. goto bail;
  1791. }
  1792. i_size_write(inode, inode->i_sb->s_blocksize);
  1793. inode->i_nlink = 2;
  1794. inode->i_blocks = ocfs2_inode_sector_count(inode);
  1795. status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
  1796. if (status < 0) {
  1797. mlog_errno(status);
  1798. goto bail;
  1799. }
  1800. status = 0;
  1801. if (ret_new_bh) {
  1802. *ret_new_bh = new_bh;
  1803. new_bh = NULL;
  1804. }
  1805. bail:
  1806. brelse(new_bh);
  1807. mlog_exit(status);
  1808. return status;
  1809. }
  1810. static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
  1811. handle_t *handle, struct inode *dir,
  1812. struct buffer_head *di_bh,
  1813. struct ocfs2_alloc_context *meta_ac,
  1814. int dx_inline,
  1815. struct buffer_head **ret_dx_root_bh)
  1816. {
  1817. int ret;
  1818. struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
  1819. u16 dr_suballoc_bit;
  1820. u64 dr_blkno;
  1821. unsigned int num_bits;
  1822. struct buffer_head *dx_root_bh = NULL;
  1823. struct ocfs2_dx_root_block *dx_root;
  1824. ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, &dr_suballoc_bit,
  1825. &num_bits, &dr_blkno);
  1826. if (ret) {
  1827. mlog_errno(ret);
  1828. goto out;
  1829. }
  1830. mlog(0, "Dir %llu, attach new index block: %llu\n",
  1831. (unsigned long long)OCFS2_I(dir)->ip_blkno,
  1832. (unsigned long long)dr_blkno);
  1833. dx_root_bh = sb_getblk(osb->sb, dr_blkno);
  1834. if (dx_root_bh == NULL) {
  1835. ret = -EIO;
  1836. goto out;
  1837. }
  1838. ocfs2_set_new_buffer_uptodate(dir, dx_root_bh);
  1839. ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
  1840. OCFS2_JOURNAL_ACCESS_CREATE);
  1841. if (ret < 0) {
  1842. mlog_errno(ret);
  1843. goto out;
  1844. }
  1845. dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
  1846. memset(dx_root, 0, osb->sb->s_blocksize);
  1847. strcpy(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE);
  1848. dx_root->dr_suballoc_slot = cpu_to_le16(osb->slot_num);
  1849. dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit);
  1850. dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation);
  1851. dx_root->dr_blkno = cpu_to_le64(dr_blkno);
  1852. dx_root->dr_dir_blkno = cpu_to_le64(OCFS2_I(dir)->ip_blkno);
  1853. if (dx_inline) {
  1854. dx_root->dr_flags |= OCFS2_DX_FLAG_INLINE;
  1855. dx_root->dr_entries.de_count =
  1856. cpu_to_le16(ocfs2_dx_entries_per_root(osb->sb));
  1857. } else {
  1858. dx_root->dr_list.l_count =
  1859. cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
  1860. }
  1861. ret = ocfs2_journal_dirty(handle, dx_root_bh);
  1862. if (ret)
  1863. mlog_errno(ret);
  1864. ret = ocfs2_journal_access_di(handle, dir, di_bh,
  1865. OCFS2_JOURNAL_ACCESS_CREATE);
  1866. if (ret) {
  1867. mlog_errno(ret);
  1868. goto out;
  1869. }
  1870. di->i_dx_root = cpu_to_le64(dr_blkno);
  1871. OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL;
  1872. di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
  1873. ret = ocfs2_journal_dirty(handle, di_bh);
  1874. if (ret)
  1875. mlog_errno(ret);
  1876. *ret_dx_root_bh = dx_root_bh;
  1877. dx_root_bh = NULL;
  1878. out:
  1879. brelse(dx_root_bh);
  1880. return ret;
  1881. }
  1882. static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb,
  1883. handle_t *handle, struct inode *dir,
  1884. struct buffer_head **dx_leaves,
  1885. int num_dx_leaves, u64 start_blk)
  1886. {
  1887. int ret, i;
  1888. struct ocfs2_dx_leaf *dx_leaf;
  1889. struct buffer_head *bh;
  1890. for (i = 0; i < num_dx_leaves; i++) {
  1891. bh = sb_getblk(osb->sb, start_blk + i);
  1892. if (bh == NULL) {
  1893. ret = -EIO;
  1894. goto out;
  1895. }
  1896. dx_leaves[i] = bh;
  1897. ocfs2_set_new_buffer_uptodate(dir, bh);
  1898. ret = ocfs2_journal_access_dl(handle, dir, bh,
  1899. OCFS2_JOURNAL_ACCESS_CREATE);
  1900. if (ret < 0) {
  1901. mlog_errno(ret);
  1902. goto out;
  1903. }
  1904. dx_leaf = (struct ocfs2_dx_leaf *) bh->b_data;
  1905. memset(dx_leaf, 0, osb->sb->s_blocksize);
  1906. strcpy(dx_leaf->dl_signature, OCFS2_DX_LEAF_SIGNATURE);
  1907. dx_leaf->dl_fs_generation = cpu_to_le32(osb->fs_generation);
  1908. dx_leaf->dl_blkno = cpu_to_le64(bh->b_blocknr);
  1909. dx_leaf->dl_list.de_count =
  1910. cpu_to_le16(ocfs2_dx_entries_per_leaf(osb->sb));
  1911. mlog(0,
  1912. "Dir %llu, format dx_leaf: %llu, entry count: %u\n",
  1913. (unsigned long long)OCFS2_I(dir)->ip_blkno,
  1914. (unsigned long long)bh->b_blocknr,
  1915. le16_to_cpu(dx_leaf->dl_list.de_count));
  1916. ocfs2_journal_dirty(handle, bh);
  1917. }
  1918. ret = 0;
  1919. out:
  1920. return ret;
  1921. }
  1922. /*
  1923. * Allocates and formats a new cluster for use in an indexed dir
  1924. * leaf. This version will not do the extent insert, so that it can be
  1925. * used by operations which need careful ordering.
  1926. */
  1927. static int __ocfs2_dx_dir_new_cluster(struct inode *dir,
  1928. u32 cpos, handle_t *handle,
  1929. struct ocfs2_alloc_context *data_ac,
  1930. struct buffer_head **dx_leaves,
  1931. int num_dx_leaves, u64 *ret_phys_blkno)
  1932. {
  1933. int ret;
  1934. u32 phys, num;
  1935. u64 phys_blkno;
  1936. struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
  1937. /*
  1938. * XXX: For create, this should claim cluster for the index
  1939. * *before* the unindexed insert so that we have a better
  1940. * chance of contiguousness as the directory grows in number
  1941. * of entries.
  1942. */
  1943. ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1, 1, &phys, &num);
  1944. if (ret) {
  1945. mlog_errno(ret);
  1946. goto out;
  1947. }
  1948. /*
  1949. * Format the new cluster first. That way, we're inserting
  1950. * valid data.
  1951. */
  1952. phys_blkno = ocfs2_clusters_to_blocks(osb->sb, phys);
  1953. ret = ocfs2_dx_dir_format_cluster(osb, handle, dir, dx_leaves,
  1954. num_dx_leaves, phys_blkno);
  1955. if (ret) {
  1956. mlog_errno(ret);
  1957. goto out;
  1958. }
  1959. *ret_phys_blkno = phys_blkno;
  1960. out:
  1961. return ret;
  1962. }
  1963. static int ocfs2_dx_dir_new_cluster(struct inode *dir,
  1964. struct ocfs2_extent_tree *et,
  1965. u32 cpos, handle_t *handle,
  1966. struct ocfs2_alloc_context *data_ac,
  1967. struct ocfs2_alloc_context *meta_ac,
  1968. struct buffer_head **dx_leaves,
  1969. int num_dx_leaves)
  1970. {
  1971. int ret;
  1972. u64 phys_blkno;
  1973. struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
  1974. ret = __ocfs2_dx_dir_new_cluster(dir, cpos, handle, data_ac, dx_leaves,
  1975. num_dx_leaves, &phys_blkno);
  1976. if (ret) {
  1977. mlog_errno(ret);
  1978. goto out;
  1979. }
  1980. ret = ocfs2_insert_extent(osb, handle, dir, et, cpos, phys_blkno, 1, 0,
  1981. meta_ac);
  1982. if (ret)
  1983. mlog_errno(ret);
  1984. out:
  1985. return ret;
  1986. }
  1987. static struct buffer_head **ocfs2_dx_dir_kmalloc_leaves(struct super_block *sb,
  1988. int *ret_num_leaves)
  1989. {
  1990. int num_dx_leaves = ocfs2_clusters_to_blocks(sb, 1);
  1991. struct buffer_head **dx_leaves;
  1992. dx_leaves = kcalloc(num_dx_leaves, sizeof(struct buffer_head *),
  1993. GFP_NOFS);
  1994. if (dx_leaves && ret_num_leaves)
  1995. *ret_num_leaves = num_dx_leaves;
  1996. return dx_leaves;
  1997. }
  1998. static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb,
  1999. handle_t *handle,
  2000. struct inode *parent,
  2001. struct inode *inode,
  2002. struct buffer_head *di_bh,
  2003. struct ocfs2_alloc_context *data_ac,
  2004. struct ocfs2_alloc_context *meta_ac)
  2005. {
  2006. int ret;
  2007. struct buffer_head *leaf_bh = NULL;
  2008. struct buffer_head *dx_root_bh = NULL;
  2009. struct ocfs2_dx_hinfo hinfo;
  2010. struct ocfs2_dx_root_block *dx_root;
  2011. struct ocfs2_dx_entry_list *entry_list;
  2012. /*
  2013. * Our strategy is to create the directory as though it were
  2014. * unindexed, then add the index block. This works with very
  2015. * little complication since the state of a new directory is a
  2016. * very well known quantity.
  2017. *
  2018. * Essentially, we have two dirents ("." and ".."), in the 1st
  2019. * block which need indexing. These are easily inserted into
  2020. * the index block.
  2021. */
  2022. ret = ocfs2_fill_new_dir_el(osb, handle, parent, inode, di_bh,
  2023. data_ac, &leaf_bh);
  2024. if (ret) {
  2025. mlog_errno(ret);
  2026. goto out;
  2027. }
  2028. ret = ocfs2_dx_dir_attach_index(osb, handle, inode, di_bh,
  2029. meta_ac, 1, &dx_root_bh);
  2030. if (ret) {
  2031. mlog_errno(ret);
  2032. goto out;
  2033. }
  2034. dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
  2035. entry_list = &dx_root->dr_entries;
  2036. /* Buffer has been journaled for us by ocfs2_dx_dir_attach_index */
  2037. ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr);
  2038. ocfs2_dx_dir_name_hash(inode, "..", 2, &hinfo);
  2039. ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr);
  2040. out:
  2041. brelse(dx_root_bh);
  2042. brelse(leaf_bh);
  2043. return ret;
  2044. }
  2045. int ocfs2_fill_new_dir(struct ocfs2_super *osb,
  2046. handle_t *handle,
  2047. struct inode *parent,
  2048. struct inode *inode,
  2049. struct buffer_head *fe_bh,
  2050. struct ocfs2_alloc_context *data_ac,
  2051. struct ocfs2_alloc_context *meta_ac)
  2052. {
  2053. BUG_ON(!ocfs2_supports_inline_data(osb) && data_ac == NULL);
  2054. if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
  2055. return ocfs2_fill_new_dir_id(osb, handle, parent, inode, fe_bh);
  2056. if (ocfs2_supports_indexed_dirs(osb))
  2057. return ocfs2_fill_new_dir_dx(osb, handle, parent, inode, fe_bh,
  2058. data_ac, meta_ac);
  2059. return ocfs2_fill_new_dir_el(osb, handle, parent, inode, fe_bh,
  2060. data_ac, NULL);
  2061. }
  2062. static int ocfs2_dx_dir_index_block(struct inode *dir,
  2063. handle_t *handle,
  2064. struct buffer_head **dx_leaves,
  2065. int num_dx_leaves,
  2066. struct buffer_head *dirent_bh)
  2067. {
  2068. int ret, namelen, i;
  2069. char *de_buf, *limit;
  2070. struct ocfs2_dir_entry *de;
  2071. struct buffer_head *dx_leaf_bh;
  2072. struct ocfs2_dx_hinfo hinfo;
  2073. u64 dirent_blk = dirent_bh->b_blocknr;
  2074. de_buf = dirent_bh->b_data;
  2075. limit = de_buf + dir->i_sb->s_blocksize;
  2076. while (de_buf < limit) {
  2077. de = (struct ocfs2_dir_entry *)de_buf;
  2078. namelen = de->name_len;
  2079. if (!namelen || !de->inode)
  2080. goto inc;
  2081. ocfs2_dx_dir_name_hash(dir, de->name, namelen, &hinfo);
  2082. i = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb), &hinfo);
  2083. dx_leaf_bh = dx_leaves[i];
  2084. ret = __ocfs2_dx_dir_leaf_insert(dir, handle, &hinfo,
  2085. dirent_blk, dx_leaf_bh);
  2086. if (ret) {
  2087. mlog_errno(ret);
  2088. goto out;
  2089. }
  2090. inc:
  2091. de_buf += le16_to_cpu(de->rec_len);
  2092. }
  2093. out:
  2094. return ret;
  2095. }
  2096. /*
  2097. * XXX: This expects dx_root_bh to already be part of the transaction.
  2098. */
  2099. static void ocfs2_dx_dir_index_root_block(struct inode *dir,
  2100. struct buffer_head *dx_root_bh,
  2101. struct buffer_head *dirent_bh)
  2102. {
  2103. char *de_buf, *limit;
  2104. struct ocfs2_dx_root_block *dx_root;
  2105. struct ocfs2_dir_entry *de;
  2106. struct ocfs2_dx_hinfo hinfo;
  2107. u64 dirent_blk = dirent_bh->b_blocknr;
  2108. dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
  2109. de_buf = dirent_bh->b_data;
  2110. limit = de_buf + dir->i_sb->s_blocksize;
  2111. while (de_buf < limit) {
  2112. de = (struct ocfs2_dir_entry *)de_buf;
  2113. if (!de->name_len || !de->inode)
  2114. goto inc;
  2115. ocfs2_dx_dir_name_hash(dir, de->name, de->name_len, &hinfo);
  2116. mlog(0,
  2117. "dir: %llu, major: 0x%x minor: 0x%x, index: %u, name: %.*s\n",
  2118. (unsigned long long)dir->i_ino, hinfo.major_hash,
  2119. hinfo.minor_hash,
  2120. le16_to_cpu(dx_root->dr_entries.de_num_used),
  2121. de->name_len, de->name);
  2122. ocfs2_dx_entry_list_insert(&dx_root->dr_entries, &hinfo,
  2123. dirent_blk);
  2124. inc:
  2125. de_buf += le16_to_cpu(de->rec_len);
  2126. }
  2127. }
  2128. /*
  2129. * Count the number of inline directory entries in di_bh and compare
  2130. * them against the number of entries we can hold in an inline dx root
  2131. * block.
  2132. */
  2133. static int ocfs2_new_dx_should_be_inline(struct inode *dir,
  2134. struct buffer_head *di_bh)
  2135. {
  2136. int dirent_count = 0;
  2137. char *de_buf, *limit;
  2138. struct ocfs2_dir_entry *de;
  2139. struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
  2140. de_buf = di->id2.i_data.id_data;
  2141. limit = de_buf + i_size_read(dir);
  2142. while (de_buf < limit) {
  2143. de = (struct ocfs2_dir_entry *)de_buf;
  2144. if (de->name_len && de->inode)
  2145. dirent_count++;
  2146. de_buf += le16_to_cpu(de->rec_len);
  2147. }
  2148. /* We are careful to leave room for one extra record. */
  2149. return dirent_count < ocfs2_dx_entries_per_root(dir->i_sb);
  2150. }
  2151. /*
  2152. * Expand rec_len of the rightmost dirent in a directory block so that it
  2153. * contains the end of our valid space for dirents. We do this during
  2154. * expansion from an inline directory to one with extents. The first dir block
  2155. * in that case is taken from the inline data portion of the inode block.
  2156. *
  2157. * We add the dir trailer if this filesystem wants it.
  2158. */
  2159. static void ocfs2_expand_last_dirent(char *start, unsigned int old_size,
  2160. struct super_block *sb)
  2161. {
  2162. struct ocfs2_dir_entry *de;
  2163. struct ocfs2_dir_entry *prev_de;
  2164. char *de_buf, *limit;
  2165. unsigned int new_size = sb->s_blocksize;
  2166. unsigned int bytes;
  2167. if (ocfs2_supports_dir_trailer(OCFS2_SB(sb)))
  2168. new_size = ocfs2_dir_trailer_blk_off(sb);
  2169. bytes = new_size - old_size;
  2170. limit = start + old_size;
  2171. de_buf = start;
  2172. de = (struct ocfs2_dir_entry *)de_buf;
  2173. do {
  2174. prev_de = de;
  2175. de_buf += le16_to_cpu(de->rec_len);
  2176. de = (struct ocfs2_dir_entry *)de_buf;
  2177. } while (de_buf < limit);
  2178. le16_add_cpu(&prev_de->rec_len, bytes);
  2179. }
  2180. /*
  2181. * We allocate enough clusters to fulfill "blocks_wanted", but set
  2182. * i_size to exactly one block. Ocfs2_extend_dir() will handle the
  2183. * rest automatically for us.
  2184. *
  2185. * *first_block_bh is a pointer to the 1st data block allocated to the
  2186. * directory.
  2187. */
  2188. static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
  2189. unsigned int blocks_wanted,
  2190. struct ocfs2_dir_lookup_result *lookup,
  2191. struct buffer_head **first_block_bh)
  2192. {
  2193. u32 alloc, dx_alloc, bit_off, len;
  2194. struct super_block *sb = dir->i_sb;
  2195. int ret, i, num_dx_leaves = 0, dx_inline = 0,
  2196. credits = ocfs2_inline_to_extents_credits(sb);
  2197. u64 dx_insert_blkno, blkno,
  2198. bytes = blocks_wanted << sb->s_blocksize_bits;
  2199. struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
  2200. struct ocfs2_inode_info *oi = OCFS2_I(dir);
  2201. struct ocfs2_alloc_context *data_ac;
  2202. struct ocfs2_alloc_context *meta_ac = NULL;
  2203. struct buffer_head *dirdata_bh = NULL;
  2204. struct buffer_head *dx_root_bh = NULL;
  2205. struct buffer_head **dx_leaves = NULL;
  2206. struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
  2207. handle_t *handle;
  2208. struct ocfs2_extent_tree et;
  2209. struct ocfs2_extent_tree dx_et;
  2210. int did_quota = 0, bytes_allocated = 0;
  2211. ocfs2_init_dinode_extent_tree(&et, dir, di_bh);
  2212. alloc = ocfs2_clusters_for_bytes(sb, bytes);
  2213. dx_alloc = 0;
  2214. if (ocfs2_supports_indexed_dirs(osb)) {
  2215. credits += ocfs2_add_dir_index_credits(sb);
  2216. dx_inline = ocfs2_new_dx_should_be_inline(dir, di_bh);
  2217. if (!dx_inline) {
  2218. /* Add one more cluster for an index leaf */
  2219. dx_alloc++;
  2220. dx_leaves = ocfs2_dx_dir_kmalloc_leaves(sb,
  2221. &num_dx_leaves);
  2222. if (!dx_leaves) {
  2223. ret = -ENOMEM;
  2224. mlog_errno(ret);
  2225. goto out;
  2226. }
  2227. }
  2228. /* This gets us the dx_root */
  2229. ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
  2230. if (ret) {
  2231. mlog_errno(ret);
  2232. goto out;
  2233. }
  2234. }
  2235. /*
  2236. * We should never need more than 2 clusters for the unindexed
  2237. * tree - maximum dirent size is far less than one block. In
  2238. * fact, the only time we'd need more than one cluster is if
  2239. * blocksize == clustersize and the dirent won't fit in the
  2240. * extra space that the expansion to a single block gives. As
  2241. * of today, that only happens on 4k/4k file systems.
  2242. */
  2243. BUG_ON(alloc > 2);
  2244. ret = ocfs2_reserve_clusters(osb, alloc, &data_ac);
  2245. if (ret) {
  2246. mlog_errno(ret);
  2247. goto out;
  2248. }
  2249. down_write(&oi->ip_alloc_sem);
  2250. /*
  2251. * Prepare for worst case allocation scenario of two separate
  2252. * extents in the unindexed tree.
  2253. */
  2254. if (alloc == 2)
  2255. credits += OCFS2_SUBALLOC_ALLOC;
  2256. handle = ocfs2_start_trans(osb, credits);
  2257. if (IS_ERR(handle)) {
  2258. ret = PTR_ERR(handle);
  2259. mlog_errno(ret);
  2260. goto out_sem;
  2261. }
  2262. if (vfs_dq_alloc_space_nodirty(dir,
  2263. ocfs2_clusters_to_bytes(osb->sb,
  2264. alloc + dx_alloc))) {
  2265. ret = -EDQUOT;
  2266. goto out_commit;
  2267. }
  2268. did_quota = 1;
  2269. if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
  2270. /*
  2271. * Allocate our index cluster first, to maximize the
  2272. * possibility that unindexed leaves grow
  2273. * contiguously.
  2274. */
  2275. ret = __ocfs2_dx_dir_new_cluster(dir, 0, handle, data_ac,
  2276. dx_leaves, num_dx_leaves,
  2277. &dx_insert_blkno);
  2278. if (ret) {
  2279. mlog_errno(ret);
  2280. goto out_commit;
  2281. }
  2282. bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1);
  2283. }
  2284. /*
  2285. * Try to claim as many clusters as the bitmap can give though
  2286. * if we only get one now, that's enough to continue. The rest
  2287. * will be claimed after the conversion to extents.
  2288. */
  2289. ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
  2290. if (ret) {
  2291. mlog_errno(ret);
  2292. goto out_commit;
  2293. }
  2294. bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1);
  2295. /*
  2296. * Operations are carefully ordered so that we set up the new
  2297. * data block first. The conversion from inline data to
  2298. * extents follows.
  2299. */
  2300. blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
  2301. dirdata_bh = sb_getblk(sb, blkno);
  2302. if (!dirdata_bh) {
  2303. ret = -EIO;
  2304. mlog_errno(ret);
  2305. goto out_commit;
  2306. }
  2307. ocfs2_set_new_buffer_uptodate(dir, dirdata_bh);
  2308. ret = ocfs2_journal_access_db(handle, dir, dirdata_bh,
  2309. OCFS2_JOURNAL_ACCESS_CREATE);
  2310. if (ret) {
  2311. mlog_errno(ret);
  2312. goto out_commit;
  2313. }
  2314. memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir));
  2315. memset(dirdata_bh->b_data + i_size_read(dir), 0,
  2316. sb->s_blocksize - i_size_read(dir));
  2317. ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), sb);
  2318. if (ocfs2_supports_dir_trailer(osb))
  2319. ocfs2_init_dir_trailer(dir, dirdata_bh);
  2320. ret = ocfs2_journal_dirty(handle, dirdata_bh);
  2321. if (ret) {
  2322. mlog_errno(ret);
  2323. goto out_commit;
  2324. }
  2325. if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
  2326. /*
  2327. * Dx dirs with an external cluster need to do this up
  2328. * front. Inline dx root's get handled later, after
  2329. * we've allocated our root block.
  2330. */
  2331. ret = ocfs2_dx_dir_index_block(dir, handle, dx_leaves,
  2332. num_dx_leaves, dirdata_bh);
  2333. if (ret) {
  2334. mlog_errno(ret);
  2335. goto out_commit;
  2336. }
  2337. }
  2338. /*
  2339. * Set extent, i_size, etc on the directory. After this, the
  2340. * inode should contain the same exact dirents as before and
  2341. * be fully accessible from system calls.
  2342. *
  2343. * We let the later dirent insert modify c/mtime - to the user
  2344. * the data hasn't changed.
  2345. */
  2346. ret = ocfs2_journal_access_di(handle, dir, di_bh,
  2347. OCFS2_JOURNAL_ACCESS_CREATE);
  2348. if (ret) {
  2349. mlog_errno(ret);
  2350. goto out_commit;
  2351. }
  2352. spin_lock(&oi->ip_lock);
  2353. oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL;
  2354. di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
  2355. spin_unlock(&oi->ip_lock);
  2356. ocfs2_dinode_new_extent_list(dir, di);
  2357. i_size_write(dir, sb->s_blocksize);
  2358. dir->i_mtime = dir->i_ctime = CURRENT_TIME;
  2359. di->i_size = cpu_to_le64(sb->s_blocksize);
  2360. di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec);
  2361. di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec);
  2362. /*
  2363. * This should never fail as our extent list is empty and all
  2364. * related blocks have been journaled already.
  2365. */
  2366. ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, blkno, len,
  2367. 0, NULL);
  2368. if (ret) {
  2369. mlog_errno(ret);
  2370. goto out_commit;
  2371. }
  2372. /*
  2373. * Set i_blocks after the extent insert for the most up to
  2374. * date ip_clusters value.
  2375. */
  2376. dir->i_blocks = ocfs2_inode_sector_count(dir);
  2377. ret = ocfs2_journal_dirty(handle, di_bh);
  2378. if (ret) {
  2379. mlog_errno(ret);
  2380. goto out_commit;
  2381. }
  2382. if (ocfs2_supports_indexed_dirs(osb)) {
  2383. ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh,
  2384. meta_ac, dx_inline,
  2385. &dx_root_bh);
  2386. if (ret) {
  2387. mlog_errno(ret);
  2388. goto out_commit;
  2389. }
  2390. if (dx_inline) {
  2391. ocfs2_dx_dir_index_root_block(dir, dx_root_bh,
  2392. dirdata_bh);
  2393. } else {
  2394. ocfs2_init_dx_root_extent_tree(&dx_et, dir, dx_root_bh);
  2395. ret = ocfs2_insert_extent(osb, handle, dir, &dx_et, 0,
  2396. dx_insert_blkno, 1, 0, NULL);
  2397. if (ret)
  2398. mlog_errno(ret);
  2399. }
  2400. }
  2401. /*
  2402. * We asked for two clusters, but only got one in the 1st
  2403. * pass. Claim the 2nd cluster as a separate extent.
  2404. */
  2405. if (alloc > len) {
  2406. ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
  2407. &len);
  2408. if (ret) {
  2409. mlog_errno(ret);
  2410. goto out_commit;
  2411. }
  2412. blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
  2413. ret = ocfs2_insert_extent(osb, handle, dir, &et, 1,
  2414. blkno, len, 0, NULL);
  2415. if (ret) {
  2416. mlog_errno(ret);
  2417. goto out_commit;
  2418. }
  2419. bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1);
  2420. }
  2421. *first_block_bh = dirdata_bh;
  2422. dirdata_bh = NULL;
  2423. if (ocfs2_supports_indexed_dirs(osb)) {
  2424. unsigned int off;
  2425. if (!dx_inline) {
  2426. /*
  2427. * We need to return the correct block within the
  2428. * cluster which should hold our entry.
  2429. */
  2430. off = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb),
  2431. &lookup->dl_hinfo);
  2432. get_bh(dx_leaves[off]);
  2433. lookup->dl_dx_leaf_bh = dx_leaves[off];
  2434. }
  2435. lookup->dl_dx_root_bh = dx_root_bh;
  2436. dx_root_bh = NULL;
  2437. }
  2438. out_commit:
  2439. if (ret < 0 && did_quota)
  2440. vfs_dq_free_space_nodirty(dir, bytes_allocated);
  2441. ocfs2_commit_trans(osb, handle);
  2442. out_sem:
  2443. up_write(&oi->ip_alloc_sem);
  2444. out:
  2445. if (data_ac)
  2446. ocfs2_free_alloc_context(data_ac);
  2447. if (meta_ac)
  2448. ocfs2_free_alloc_context(meta_ac);
  2449. if (dx_leaves) {
  2450. for (i = 0; i < num_dx_leaves; i++)
  2451. brelse(dx_leaves[i]);
  2452. kfree(dx_leaves);
  2453. }
  2454. brelse(dirdata_bh);
  2455. brelse(dx_root_bh);
  2456. return ret;
  2457. }
  2458. /* returns a bh of the 1st new block in the allocation. */
  2459. static int ocfs2_do_extend_dir(struct super_block *sb,
  2460. handle_t *handle,
  2461. struct inode *dir,
  2462. struct buffer_head *parent_fe_bh,
  2463. struct ocfs2_alloc_context *data_ac,
  2464. struct ocfs2_alloc_context *meta_ac,
  2465. struct buffer_head **new_bh)
  2466. {
  2467. int status;
  2468. int extend, did_quota = 0;
  2469. u64 p_blkno, v_blkno;
  2470. spin_lock(&OCFS2_I(dir)->ip_lock);
  2471. extend = (i_size_read(dir) == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters));
  2472. spin_unlock(&OCFS2_I(dir)->ip_lock);
  2473. if (extend) {
  2474. u32 offset = OCFS2_I(dir)->ip_clusters;
  2475. if (vfs_dq_alloc_space_nodirty(dir,
  2476. ocfs2_clusters_to_bytes(sb, 1))) {
  2477. status = -EDQUOT;
  2478. goto bail;
  2479. }
  2480. did_quota = 1;
  2481. status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset,
  2482. 1, 0, parent_fe_bh, handle,
  2483. data_ac, meta_ac, NULL);
  2484. BUG_ON(status == -EAGAIN);
  2485. if (status < 0) {
  2486. mlog_errno(status);
  2487. goto bail;
  2488. }
  2489. }
  2490. v_blkno = ocfs2_blocks_for_bytes(sb, i_size_read(dir));
  2491. status = ocfs2_extent_map_get_blocks(dir, v_blkno, &p_blkno, NULL, NULL);
  2492. if (status < 0) {
  2493. mlog_errno(status);
  2494. goto bail;
  2495. }
  2496. *new_bh = sb_getblk(sb, p_blkno);
  2497. if (!*new_bh) {
  2498. status = -EIO;
  2499. mlog_errno(status);
  2500. goto bail;
  2501. }
  2502. status = 0;
  2503. bail:
  2504. if (did_quota && status < 0)
  2505. vfs_dq_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1));
  2506. mlog_exit(status);
  2507. return status;
  2508. }
  2509. /*
  2510. * Assumes you already have a cluster lock on the directory.
  2511. *
  2512. * 'blocks_wanted' is only used if we have an inline directory which
  2513. * is to be turned into an extent based one. The size of the dirent to
  2514. * insert might be larger than the space gained by growing to just one
  2515. * block, so we may have to grow the inode by two blocks in that case.
  2516. */
  2517. static int ocfs2_extend_dir(struct ocfs2_super *osb,
  2518. struct inode *dir,
  2519. struct buffer_head *parent_fe_bh,
  2520. unsigned int blocks_wanted,
  2521. struct ocfs2_dir_lookup_result *lookup,
  2522. struct buffer_head **new_de_bh)
  2523. {
  2524. int status = 0;
  2525. int credits, num_free_extents, drop_alloc_sem = 0;
  2526. loff_t dir_i_size;
  2527. struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
  2528. struct ocfs2_extent_list *el = &fe->id2.i_list;
  2529. struct ocfs2_alloc_context *data_ac = NULL;
  2530. struct ocfs2_alloc_context *meta_ac = NULL;
  2531. handle_t *handle = NULL;
  2532. struct buffer_head *new_bh = NULL;
  2533. struct ocfs2_dir_entry * de;
  2534. struct super_block *sb = osb->sb;
  2535. struct ocfs2_extent_tree et;
  2536. mlog_entry_void();
  2537. if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
  2538. status = ocfs2_expand_inline_dir(dir, parent_fe_bh,
  2539. blocks_wanted, lookup,
  2540. &new_bh);
  2541. if (status) {
  2542. mlog_errno(status);
  2543. goto bail;
  2544. }
  2545. if (blocks_wanted == 1) {
  2546. /*
  2547. * If the new dirent will fit inside the space
  2548. * created by pushing out to one block, then
  2549. * we can complete the operation
  2550. * here. Otherwise we have to expand i_size
  2551. * and format the 2nd block below.
  2552. */
  2553. BUG_ON(new_bh == NULL);
  2554. goto bail_bh;
  2555. }
  2556. /*
  2557. * Get rid of 'new_bh' - we want to format the 2nd
  2558. * data block and return that instead.
  2559. */
  2560. brelse(new_bh);
  2561. new_bh = NULL;
  2562. dir_i_size = i_size_read(dir);
  2563. credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
  2564. goto do_extend;
  2565. }
  2566. dir_i_size = i_size_read(dir);
  2567. mlog(0, "extending dir %llu (i_size = %lld)\n",
  2568. (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size);
  2569. /* dir->i_size is always block aligned. */
  2570. spin_lock(&OCFS2_I(dir)->ip_lock);
  2571. if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
  2572. spin_unlock(&OCFS2_I(dir)->ip_lock);
  2573. ocfs2_init_dinode_extent_tree(&et, dir, parent_fe_bh);
  2574. num_free_extents = ocfs2_num_free_extents(osb, dir, &et);
  2575. if (num_free_extents < 0) {
  2576. status = num_free_extents;
  2577. mlog_errno(status);
  2578. goto bail;
  2579. }
  2580. if (!num_free_extents) {
  2581. status = ocfs2_reserve_new_metadata(osb, el, &meta_ac);
  2582. if (status < 0) {
  2583. if (status != -ENOSPC)
  2584. mlog_errno(status);
  2585. goto bail;
  2586. }
  2587. }
  2588. status = ocfs2_reserve_clusters(osb, 1, &data_ac);
  2589. if (status < 0) {
  2590. if (status != -ENOSPC)
  2591. mlog_errno(status);
  2592. goto bail;
  2593. }
  2594. credits = ocfs2_calc_extend_credits(sb, el, 1);
  2595. } else {
  2596. spin_unlock(&OCFS2_I(dir)->ip_lock);
  2597. credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
  2598. }
  2599. do_extend:
  2600. down_write(&OCFS2_I(dir)->ip_alloc_sem);
  2601. drop_alloc_sem = 1;
  2602. handle = ocfs2_start_trans(osb, credits);
  2603. if (IS_ERR(handle)) {
  2604. status = PTR_ERR(handle);
  2605. handle = NULL;
  2606. mlog_errno(status);
  2607. goto bail;
  2608. }
  2609. status = ocfs2_do_extend_dir(osb->sb, handle, dir, parent_fe_bh,
  2610. data_ac, meta_ac, &new_bh);
  2611. if (status < 0) {
  2612. mlog_errno(status);
  2613. goto bail;
  2614. }
  2615. ocfs2_set_new_buffer_uptodate(dir, new_bh);
  2616. status = ocfs2_journal_access_db(handle, dir, new_bh,
  2617. OCFS2_JOURNAL_ACCESS_CREATE);
  2618. if (status < 0) {
  2619. mlog_errno(status);
  2620. goto bail;
  2621. }
  2622. memset(new_bh->b_data, 0, sb->s_blocksize);
  2623. de = (struct ocfs2_dir_entry *) new_bh->b_data;
  2624. de->inode = 0;
  2625. if (ocfs2_dir_has_trailer(dir)) {
  2626. de->rec_len = cpu_to_le16(ocfs2_dir_trailer_blk_off(sb));
  2627. ocfs2_init_dir_trailer(dir, new_bh);
  2628. } else {
  2629. de->rec_len = cpu_to_le16(sb->s_blocksize);
  2630. }
  2631. status = ocfs2_journal_dirty(handle, new_bh);
  2632. if (status < 0) {
  2633. mlog_errno(status);
  2634. goto bail;
  2635. }
  2636. dir_i_size += dir->i_sb->s_blocksize;
  2637. i_size_write(dir, dir_i_size);
  2638. dir->i_blocks = ocfs2_inode_sector_count(dir);
  2639. status = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
  2640. if (status < 0) {
  2641. mlog_errno(status);
  2642. goto bail;
  2643. }
  2644. bail_bh:
  2645. *new_de_bh = new_bh;
  2646. get_bh(*new_de_bh);
  2647. bail:
  2648. if (drop_alloc_sem)
  2649. up_write(&OCFS2_I(dir)->ip_alloc_sem);
  2650. if (handle)
  2651. ocfs2_commit_trans(osb, handle);
  2652. if (data_ac)
  2653. ocfs2_free_alloc_context(data_ac);
  2654. if (meta_ac)
  2655. ocfs2_free_alloc_context(meta_ac);
  2656. brelse(new_bh);
  2657. mlog_exit(status);
  2658. return status;
  2659. }
  2660. static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
  2661. const char *name, int namelen,
  2662. struct buffer_head **ret_de_bh,
  2663. unsigned int *blocks_wanted)
  2664. {
  2665. int ret;
  2666. struct super_block *sb = dir->i_sb;
  2667. struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
  2668. struct ocfs2_dir_entry *de, *last_de = NULL;
  2669. char *de_buf, *limit;
  2670. unsigned long offset = 0;
  2671. unsigned int rec_len, new_rec_len, free_space = dir->i_sb->s_blocksize;
  2672. /*
  2673. * This calculates how many free bytes we'd have in block zero, should
  2674. * this function force expansion to an extent tree.
  2675. */
  2676. if (ocfs2_supports_dir_trailer(OCFS2_SB(sb)))
  2677. free_space = ocfs2_dir_trailer_blk_off(sb) - i_size_read(dir);
  2678. else
  2679. free_space = dir->i_sb->s_blocksize - i_size_read(dir);
  2680. de_buf = di->id2.i_data.id_data;
  2681. limit = de_buf + i_size_read(dir);
  2682. rec_len = OCFS2_DIR_REC_LEN(namelen);
  2683. while (de_buf < limit) {
  2684. de = (struct ocfs2_dir_entry *)de_buf;
  2685. if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) {
  2686. ret = -ENOENT;
  2687. goto out;
  2688. }
  2689. if (ocfs2_match(namelen, name, de)) {
  2690. ret = -EEXIST;
  2691. goto out;
  2692. }
  2693. /*
  2694. * No need to check for a trailing dirent record here as
  2695. * they're not used for inline dirs.
  2696. */
  2697. if (ocfs2_dirent_would_fit(de, rec_len)) {
  2698. /* Ok, we found a spot. Return this bh and let
  2699. * the caller actually fill it in. */
  2700. *ret_de_bh = di_bh;
  2701. get_bh(*ret_de_bh);
  2702. ret = 0;
  2703. goto out;
  2704. }
  2705. last_de = de;
  2706. de_buf += le16_to_cpu(de->rec_len);
  2707. offset += le16_to_cpu(de->rec_len);
  2708. }
  2709. /*
  2710. * We're going to require expansion of the directory - figure
  2711. * out how many blocks we'll need so that a place for the
  2712. * dirent can be found.
  2713. */
  2714. *blocks_wanted = 1;
  2715. new_rec_len = le16_to_cpu(last_de->rec_len) + free_space;
  2716. if (new_rec_len < (rec_len + OCFS2_DIR_REC_LEN(last_de->name_len)))
  2717. *blocks_wanted = 2;
  2718. ret = -ENOSPC;
  2719. out:
  2720. return ret;
  2721. }
  2722. static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
  2723. int namelen, struct buffer_head **ret_de_bh)
  2724. {
  2725. unsigned long offset;
  2726. struct buffer_head *bh = NULL;
  2727. unsigned short rec_len;
  2728. struct ocfs2_dir_entry *de;
  2729. struct super_block *sb = dir->i_sb;
  2730. int status;
  2731. int blocksize = dir->i_sb->s_blocksize;
  2732. status = ocfs2_read_dir_block(dir, 0, &bh, 0);
  2733. if (status) {
  2734. mlog_errno(status);
  2735. goto bail;
  2736. }
  2737. rec_len = OCFS2_DIR_REC_LEN(namelen);
  2738. offset = 0;
  2739. de = (struct ocfs2_dir_entry *) bh->b_data;
  2740. while (1) {
  2741. if ((char *)de >= sb->s_blocksize + bh->b_data) {
  2742. brelse(bh);
  2743. bh = NULL;
  2744. if (i_size_read(dir) <= offset) {
  2745. /*
  2746. * Caller will have to expand this
  2747. * directory.
  2748. */
  2749. status = -ENOSPC;
  2750. goto bail;
  2751. }
  2752. status = ocfs2_read_dir_block(dir,
  2753. offset >> sb->s_blocksize_bits,
  2754. &bh, 0);
  2755. if (status) {
  2756. mlog_errno(status);
  2757. goto bail;
  2758. }
  2759. /* move to next block */
  2760. de = (struct ocfs2_dir_entry *) bh->b_data;
  2761. }
  2762. if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
  2763. status = -ENOENT;
  2764. goto bail;
  2765. }
  2766. if (ocfs2_match(namelen, name, de)) {
  2767. status = -EEXIST;
  2768. goto bail;
  2769. }
  2770. if (ocfs2_skip_dir_trailer(dir, de, offset % blocksize,
  2771. blocksize))
  2772. goto next;
  2773. if (ocfs2_dirent_would_fit(de, rec_len)) {
  2774. /* Ok, we found a spot. Return this bh and let
  2775. * the caller actually fill it in. */
  2776. *ret_de_bh = bh;
  2777. get_bh(*ret_de_bh);
  2778. status = 0;
  2779. goto bail;
  2780. }
  2781. next:
  2782. offset += le16_to_cpu(de->rec_len);
  2783. de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len));
  2784. }
  2785. status = 0;
  2786. bail:
  2787. brelse(bh);
  2788. mlog_exit(status);
  2789. return status;
  2790. }
  2791. static int dx_leaf_sort_cmp(const void *a, const void *b)
  2792. {
  2793. const struct ocfs2_dx_entry *entry1 = a;
  2794. const struct ocfs2_dx_entry *entry2 = b;
  2795. u32 major_hash1 = le32_to_cpu(entry1->dx_major_hash);
  2796. u32 major_hash2 = le32_to_cpu(entry2->dx_major_hash);
  2797. u32 minor_hash1 = le32_to_cpu(entry1->dx_minor_hash);
  2798. u32 minor_hash2 = le32_to_cpu(entry2->dx_minor_hash);
  2799. if (major_hash1 > major_hash2)
  2800. return 1;
  2801. if (major_hash1 < major_hash2)
  2802. return -1;
  2803. /*
  2804. * It is not strictly necessary to sort by minor
  2805. */
  2806. if (minor_hash1 > minor_hash2)
  2807. return 1;
  2808. if (minor_hash1 < minor_hash2)
  2809. return -1;
  2810. return 0;
  2811. }
  2812. static void dx_leaf_sort_swap(void *a, void *b, int size)
  2813. {
  2814. struct ocfs2_dx_entry *entry1 = a;
  2815. struct ocfs2_dx_entry *entry2 = b;
  2816. struct ocfs2_dx_entry tmp;
  2817. BUG_ON(size != sizeof(*entry1));
  2818. tmp = *entry1;
  2819. *entry1 = *entry2;
  2820. *entry2 = tmp;
  2821. }
  2822. static int ocfs2_dx_leaf_same_major(struct ocfs2_dx_leaf *dx_leaf)
  2823. {
  2824. struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list;
  2825. int i, num = le16_to_cpu(dl_list->de_num_used);
  2826. for (i = 0; i < (num - 1); i++) {
  2827. if (le32_to_cpu(dl_list->de_entries[i].dx_major_hash) !=
  2828. le32_to_cpu(dl_list->de_entries[i + 1].dx_major_hash))
  2829. return 0;
  2830. }
  2831. return 1;
  2832. }
  2833. /*
  2834. * Find the optimal value to split this leaf on. This expects the leaf
  2835. * entries to be in sorted order.
  2836. *
  2837. * leaf_cpos is the cpos of the leaf we're splitting. insert_hash is
  2838. * the hash we want to insert.
  2839. *
  2840. * This function is only concerned with the major hash - that which
  2841. * determines which cluster an item belongs to.
  2842. */
  2843. static int ocfs2_dx_dir_find_leaf_split(struct ocfs2_dx_leaf *dx_leaf,
  2844. u32 leaf_cpos, u32 insert_hash,
  2845. u32 *split_hash)
  2846. {
  2847. struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list;
  2848. int i, num_used = le16_to_cpu(dl_list->de_num_used);
  2849. int allsame;
  2850. /*
  2851. * There's a couple rare, but nasty corner cases we have to
  2852. * check for here. All of them involve a leaf where all value
  2853. * have the same hash, which is what we look for first.
  2854. *
  2855. * Most of the time, all of the above is false, and we simply
  2856. * pick the median value for a split.
  2857. */
  2858. allsame = ocfs2_dx_leaf_same_major(dx_leaf);
  2859. if (allsame) {
  2860. u32 val = le32_to_cpu(dl_list->de_entries[0].dx_major_hash);
  2861. if (val == insert_hash) {
  2862. /*
  2863. * No matter where we would choose to split,
  2864. * the new entry would want to occupy the same
  2865. * block as these. Since there's no space left
  2866. * in their existing block, we know there
  2867. * won't be space after the split.
  2868. */
  2869. return -ENOSPC;
  2870. }
  2871. if (val == leaf_cpos) {
  2872. /*
  2873. * Because val is the same as leaf_cpos (which
  2874. * is the smallest value this leaf can have),
  2875. * yet is not equal to insert_hash, then we
  2876. * know that insert_hash *must* be larger than
  2877. * val (and leaf_cpos). At least cpos+1 in value.
  2878. *
  2879. * We also know then, that there cannot be an
  2880. * adjacent extent (otherwise we'd be looking
  2881. * at it). Choosing this value gives us a
  2882. * chance to get some contiguousness.
  2883. */
  2884. *split_hash = leaf_cpos + 1;
  2885. return 0;
  2886. }
  2887. if (val > insert_hash) {
  2888. /*
  2889. * val can not be the same as insert hash, and
  2890. * also must be larger than leaf_cpos. Also,
  2891. * we know that there can't be a leaf between
  2892. * cpos and val, otherwise the entries with
  2893. * hash 'val' would be there.
  2894. */
  2895. *split_hash = val;
  2896. return 0;
  2897. }
  2898. *split_hash = insert_hash;
  2899. return 0;
  2900. }
  2901. /*
  2902. * Since the records are sorted and the checks above
  2903. * guaranteed that not all records in this block are the same,
  2904. * we simple travel forward, from the median, and pick the 1st
  2905. * record whose value is larger than leaf_cpos.
  2906. */
  2907. for (i = (num_used / 2); i < num_used; i++)
  2908. if (le32_to_cpu(dl_list->de_entries[i].dx_major_hash) >
  2909. leaf_cpos)
  2910. break;
  2911. BUG_ON(i == num_used); /* Should be impossible */
  2912. *split_hash = le32_to_cpu(dl_list->de_entries[i].dx_major_hash);
  2913. return 0;
  2914. }
  2915. /*
  2916. * Transfer all entries in orig_dx_leaves whose major hash is equal to or
  2917. * larger than split_hash into new_dx_leaves. We use a temporary
  2918. * buffer (tmp_dx_leaf) to make the changes to the original leaf blocks.
  2919. *
  2920. * Since the block offset inside a leaf (cluster) is a constant mask
  2921. * of minor_hash, we can optimize - an item at block offset X within
  2922. * the original cluster, will be at offset X within the new cluster.
  2923. */
  2924. static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash,
  2925. handle_t *handle,
  2926. struct ocfs2_dx_leaf *tmp_dx_leaf,
  2927. struct buffer_head **orig_dx_leaves,
  2928. struct buffer_head **new_dx_leaves,
  2929. int num_dx_leaves)
  2930. {
  2931. int i, j, num_used;
  2932. u32 major_hash;
  2933. struct ocfs2_dx_leaf *orig_dx_leaf, *new_dx_leaf;
  2934. struct ocfs2_dx_entry_list *orig_list, *new_list, *tmp_list;
  2935. struct ocfs2_dx_entry *dx_entry;
  2936. tmp_list = &tmp_dx_leaf->dl_list;
  2937. for (i = 0; i < num_dx_leaves; i++) {
  2938. orig_dx_leaf = (struct ocfs2_dx_leaf *) orig_dx_leaves[i]->b_data;
  2939. orig_list = &orig_dx_leaf->dl_list;
  2940. new_dx_leaf = (struct ocfs2_dx_leaf *) new_dx_leaves[i]->b_data;
  2941. new_list = &new_dx_leaf->dl_list;
  2942. num_used = le16_to_cpu(orig_list->de_num_used);
  2943. memcpy(tmp_dx_leaf, orig_dx_leaf, dir->i_sb->s_blocksize);
  2944. tmp_list->de_num_used = cpu_to_le16(0);
  2945. memset(&tmp_list->de_entries, 0, sizeof(*dx_entry)*num_used);
  2946. for (j = 0; j < num_used; j++) {
  2947. dx_entry = &orig_list->de_entries[j];
  2948. major_hash = le32_to_cpu(dx_entry->dx_major_hash);
  2949. if (major_hash >= split_hash)
  2950. ocfs2_dx_dir_leaf_insert_tail(new_dx_leaf,
  2951. dx_entry);
  2952. else
  2953. ocfs2_dx_dir_leaf_insert_tail(tmp_dx_leaf,
  2954. dx_entry);
  2955. }
  2956. memcpy(orig_dx_leaf, tmp_dx_leaf, dir->i_sb->s_blocksize);
  2957. ocfs2_journal_dirty(handle, orig_dx_leaves[i]);
  2958. ocfs2_journal_dirty(handle, new_dx_leaves[i]);
  2959. }
  2960. }
  2961. static int ocfs2_dx_dir_rebalance_credits(struct ocfs2_super *osb,
  2962. struct ocfs2_dx_root_block *dx_root)
  2963. {
  2964. int credits = ocfs2_clusters_to_blocks(osb->sb, 2);
  2965. credits += ocfs2_calc_extend_credits(osb->sb, &dx_root->dr_list, 1);
  2966. credits += ocfs2_quota_trans_credits(osb->sb);
  2967. return credits;
  2968. }
  2969. /*
  2970. * Find the median value in dx_leaf_bh and allocate a new leaf to move
  2971. * half our entries into.
  2972. */
  2973. static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
  2974. struct buffer_head *dx_root_bh,
  2975. struct buffer_head *dx_leaf_bh,
  2976. struct ocfs2_dx_hinfo *hinfo, u32 leaf_cpos,
  2977. u64 leaf_blkno)
  2978. {
  2979. struct ocfs2_dx_leaf *dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data;
  2980. int credits, ret, i, num_used, did_quota = 0;
  2981. u32 cpos, split_hash, insert_hash = hinfo->major_hash;
  2982. u64 orig_leaves_start;
  2983. int num_dx_leaves;
  2984. struct buffer_head **orig_dx_leaves = NULL;
  2985. struct buffer_head **new_dx_leaves = NULL;
  2986. struct ocfs2_alloc_context *data_ac = NULL, *meta_ac = NULL;
  2987. struct ocfs2_extent_tree et;
  2988. handle_t *handle = NULL;
  2989. struct ocfs2_dx_root_block *dx_root;
  2990. struct ocfs2_dx_leaf *tmp_dx_leaf = NULL;
  2991. mlog(0, "DX Dir: %llu, rebalance leaf leaf_blkno: %llu insert: %u\n",
  2992. (unsigned long long)OCFS2_I(dir)->ip_blkno,
  2993. (unsigned long long)leaf_blkno, insert_hash);
  2994. ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh);
  2995. dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
  2996. /*
  2997. * XXX: This is a rather large limit. We should use a more
  2998. * realistic value.
  2999. */
  3000. if (le32_to_cpu(dx_root->dr_clusters) == UINT_MAX)
  3001. return -ENOSPC;
  3002. num_used = le16_to_cpu(dx_leaf->dl_list.de_num_used);
  3003. if (num_used < le16_to_cpu(dx_leaf->dl_list.de_count)) {
  3004. mlog(ML_ERROR, "DX Dir: %llu, Asked to rebalance empty leaf: "
  3005. "%llu, %d\n", (unsigned long long)OCFS2_I(dir)->ip_blkno,
  3006. (unsigned long long)leaf_blkno, num_used);
  3007. ret = -EIO;
  3008. goto out;
  3009. }
  3010. orig_dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves);
  3011. if (!orig_dx_leaves) {
  3012. ret = -ENOMEM;
  3013. mlog_errno(ret);
  3014. goto out;
  3015. }
  3016. new_dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, NULL);
  3017. if (!new_dx_leaves) {
  3018. ret = -ENOMEM;
  3019. mlog_errno(ret);
  3020. goto out;
  3021. }
  3022. ret = ocfs2_lock_allocators(dir, &et, 1, 0, &data_ac, &meta_ac);
  3023. if (ret) {
  3024. if (ret != -ENOSPC)
  3025. mlog_errno(ret);
  3026. goto out;
  3027. }
  3028. credits = ocfs2_dx_dir_rebalance_credits(osb, dx_root);
  3029. handle = ocfs2_start_trans(osb, credits);
  3030. if (IS_ERR(handle)) {
  3031. ret = PTR_ERR(handle);
  3032. handle = NULL;
  3033. mlog_errno(ret);
  3034. goto out;
  3035. }
  3036. if (vfs_dq_alloc_space_nodirty(dir,
  3037. ocfs2_clusters_to_bytes(dir->i_sb, 1))) {
  3038. ret = -EDQUOT;
  3039. goto out_commit;
  3040. }
  3041. did_quota = 1;
  3042. ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh,
  3043. OCFS2_JOURNAL_ACCESS_WRITE);
  3044. if (ret) {
  3045. mlog_errno(ret);
  3046. goto out_commit;
  3047. }
  3048. /*
  3049. * This block is changing anyway, so we can sort it in place.
  3050. */
  3051. sort(dx_leaf->dl_list.de_entries, num_used,
  3052. sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp,
  3053. dx_leaf_sort_swap);
  3054. ret = ocfs2_journal_dirty(handle, dx_leaf_bh);
  3055. if (ret) {
  3056. mlog_errno(ret);
  3057. goto out_commit;
  3058. }
  3059. ret = ocfs2_dx_dir_find_leaf_split(dx_leaf, leaf_cpos, insert_hash,
  3060. &split_hash);
  3061. if (ret) {
  3062. mlog_errno(ret);
  3063. goto out_commit;
  3064. }
  3065. mlog(0, "Split leaf (%u) at %u, insert major hash is %u\n",
  3066. leaf_cpos, split_hash, insert_hash);
  3067. /*
  3068. * We have to carefully order operations here. There are items
  3069. * which want to be in the new cluster before insert, but in
  3070. * order to put those items in the new cluster, we alter the
  3071. * old cluster. A failure to insert gets nasty.
  3072. *
  3073. * So, start by reserving writes to the old
  3074. * cluster. ocfs2_dx_dir_new_cluster will reserve writes on
  3075. * the new cluster for us, before inserting it. The insert
  3076. * won't happen if there's an error before that. Once the
  3077. * insert is done then, we can transfer from one leaf into the
  3078. * other without fear of hitting any error.
  3079. */
  3080. /*
  3081. * The leaf transfer wants some scratch space so that we don't
  3082. * wind up doing a bunch of expensive memmove().
  3083. */
  3084. tmp_dx_leaf = kmalloc(osb->sb->s_blocksize, GFP_NOFS);
  3085. if (!tmp_dx_leaf) {
  3086. ret = -ENOMEM;
  3087. mlog_errno(ret);
  3088. goto out_commit;
  3089. }
  3090. orig_leaves_start = leaf_blkno & ~(osb->s_clustersize_bits -
  3091. osb->sb->s_blocksize_bits);
  3092. ret = ocfs2_read_dx_leaves(dir, orig_leaves_start, num_dx_leaves,
  3093. orig_dx_leaves);
  3094. if (ret) {
  3095. mlog_errno(ret);
  3096. goto out_commit;
  3097. }
  3098. for (i = 0; i < num_dx_leaves; i++) {
  3099. ret = ocfs2_journal_access_dl(handle, dir, orig_dx_leaves[i],
  3100. OCFS2_JOURNAL_ACCESS_WRITE);
  3101. if (ret) {
  3102. mlog_errno(ret);
  3103. goto out_commit;
  3104. }
  3105. }
  3106. cpos = split_hash;
  3107. ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle,
  3108. data_ac, meta_ac, new_dx_leaves,
  3109. num_dx_leaves);
  3110. if (ret) {
  3111. mlog_errno(ret);
  3112. goto out_commit;
  3113. }
  3114. ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf,
  3115. orig_dx_leaves, new_dx_leaves, num_dx_leaves);
  3116. out_commit:
  3117. if (ret < 0 && did_quota)
  3118. vfs_dq_free_space_nodirty(dir,
  3119. ocfs2_clusters_to_bytes(dir->i_sb, 1));
  3120. ocfs2_commit_trans(osb, handle);
  3121. out:
  3122. if (orig_dx_leaves || new_dx_leaves) {
  3123. for (i = 0; i < num_dx_leaves; i++) {
  3124. if (orig_dx_leaves)
  3125. brelse(orig_dx_leaves[i]);
  3126. if (new_dx_leaves)
  3127. brelse(new_dx_leaves[i]);
  3128. }
  3129. kfree(orig_dx_leaves);
  3130. kfree(new_dx_leaves);
  3131. }
  3132. if (meta_ac)
  3133. ocfs2_free_alloc_context(meta_ac);
  3134. if (data_ac)
  3135. ocfs2_free_alloc_context(data_ac);
  3136. kfree(tmp_dx_leaf);
  3137. return ret;
  3138. }
  3139. static int ocfs2_expand_inline_dx_root(struct inode *dir,
  3140. struct buffer_head *dx_root_bh)
  3141. {
  3142. int ret, num_dx_leaves, i, j, did_quota = 0;
  3143. struct buffer_head **dx_leaves = NULL;
  3144. struct ocfs2_extent_tree et;
  3145. u64 insert_blkno;
  3146. struct ocfs2_alloc_context *data_ac = NULL;
  3147. struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
  3148. handle_t *handle = NULL;
  3149. struct ocfs2_dx_root_block *dx_root;
  3150. struct ocfs2_dx_entry_list *entry_list;
  3151. struct ocfs2_dx_entry *dx_entry;
  3152. struct ocfs2_dx_leaf *target_leaf;
  3153. ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
  3154. if (ret) {
  3155. mlog_errno(ret);
  3156. goto out;
  3157. }
  3158. dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves);
  3159. if (!dx_leaves) {
  3160. ret = -ENOMEM;
  3161. mlog_errno(ret);
  3162. goto out;
  3163. }
  3164. handle = ocfs2_start_trans(osb, ocfs2_calc_dxi_expand_credits(osb->sb));
  3165. if (IS_ERR(handle)) {
  3166. ret = PTR_ERR(handle);
  3167. mlog_errno(ret);
  3168. goto out;
  3169. }
  3170. if (vfs_dq_alloc_space_nodirty(dir,
  3171. ocfs2_clusters_to_bytes(osb->sb, 1))) {
  3172. ret = -EDQUOT;
  3173. goto out_commit;
  3174. }
  3175. did_quota = 1;
  3176. /*
  3177. * We do this up front, before the allocation, so that a
  3178. * failure to add the dx_root_bh to the journal won't result
  3179. * us losing clusters.
  3180. */
  3181. ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
  3182. OCFS2_JOURNAL_ACCESS_WRITE);
  3183. if (ret) {
  3184. mlog_errno(ret);
  3185. goto out_commit;
  3186. }
  3187. ret = __ocfs2_dx_dir_new_cluster(dir, 0, handle, data_ac, dx_leaves,
  3188. num_dx_leaves, &insert_blkno);
  3189. if (ret) {
  3190. mlog_errno(ret);
  3191. goto out_commit;
  3192. }
  3193. /*
  3194. * Transfer the entries from our dx_root into the appropriate
  3195. * block
  3196. */
  3197. dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
  3198. entry_list = &dx_root->dr_entries;
  3199. for (i = 0; i < le16_to_cpu(entry_list->de_num_used); i++) {
  3200. dx_entry = &entry_list->de_entries[i];
  3201. j = __ocfs2_dx_dir_hash_idx(osb,
  3202. le32_to_cpu(dx_entry->dx_minor_hash));
  3203. target_leaf = (struct ocfs2_dx_leaf *)dx_leaves[j]->b_data;
  3204. ocfs2_dx_dir_leaf_insert_tail(target_leaf, dx_entry);
  3205. /* Each leaf has been passed to the journal already
  3206. * via __ocfs2_dx_dir_new_cluster() */
  3207. }
  3208. dx_root->dr_flags &= ~OCFS2_DX_FLAG_INLINE;
  3209. memset(&dx_root->dr_list, 0, osb->sb->s_blocksize -
  3210. offsetof(struct ocfs2_dx_root_block, dr_list));
  3211. dx_root->dr_list.l_count =
  3212. cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
  3213. /* This should never fail considering we start with an empty
  3214. * dx_root. */
  3215. ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh);
  3216. ret = ocfs2_insert_extent(osb, handle, dir, &et, 0,
  3217. insert_blkno, 1, 0, NULL);
  3218. if (ret)
  3219. mlog_errno(ret);
  3220. did_quota = 0;
  3221. ocfs2_journal_dirty(handle, dx_root_bh);
  3222. out_commit:
  3223. if (ret < 0 && did_quota)
  3224. vfs_dq_free_space_nodirty(dir,
  3225. ocfs2_clusters_to_bytes(dir->i_sb, 1));
  3226. ocfs2_commit_trans(osb, handle);
  3227. out:
  3228. if (data_ac)
  3229. ocfs2_free_alloc_context(data_ac);
  3230. if (dx_leaves) {
  3231. for (i = 0; i < num_dx_leaves; i++)
  3232. brelse(dx_leaves[i]);
  3233. kfree(dx_leaves);
  3234. }
  3235. return ret;
  3236. }
  3237. static int ocfs2_inline_dx_has_space(struct buffer_head *dx_root_bh)
  3238. {
  3239. struct ocfs2_dx_root_block *dx_root;
  3240. struct ocfs2_dx_entry_list *entry_list;
  3241. dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
  3242. entry_list = &dx_root->dr_entries;
  3243. if (le16_to_cpu(entry_list->de_num_used) >=
  3244. le16_to_cpu(entry_list->de_count))
  3245. return -ENOSPC;
  3246. return 0;
  3247. }
  3248. static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir,
  3249. struct buffer_head *di_bh, const char *name,
  3250. int namelen,
  3251. struct ocfs2_dir_lookup_result *lookup)
  3252. {
  3253. int ret, rebalanced = 0;
  3254. struct buffer_head *dx_root_bh = NULL;
  3255. struct ocfs2_dx_root_block *dx_root;
  3256. struct buffer_head *dx_leaf_bh = NULL;
  3257. struct ocfs2_dx_leaf *dx_leaf;
  3258. struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
  3259. u64 blkno;
  3260. u32 leaf_cpos;
  3261. ret = ocfs2_read_dx_root(dir, di, &dx_root_bh);
  3262. if (ret) {
  3263. mlog_errno(ret);
  3264. goto out;
  3265. }
  3266. dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
  3267. if (ocfs2_dx_root_inline(dx_root)) {
  3268. ret = ocfs2_inline_dx_has_space(dx_root_bh);
  3269. if (ret == 0)
  3270. goto search_el;
  3271. /*
  3272. * We ran out of room in the root block. Expand it to
  3273. * an extent, then allow ocfs2_find_dir_space_dx to do
  3274. * the rest.
  3275. */
  3276. ret = ocfs2_expand_inline_dx_root(dir, dx_root_bh);
  3277. if (ret) {
  3278. mlog_errno(ret);
  3279. goto out;
  3280. }
  3281. }
  3282. restart_search:
  3283. ret = ocfs2_dx_dir_lookup(dir, &dx_root->dr_list, &lookup->dl_hinfo,
  3284. &leaf_cpos, &blkno);
  3285. if (ret) {
  3286. mlog_errno(ret);
  3287. goto out;
  3288. }
  3289. ret = ocfs2_read_dx_leaf(dir, blkno, &dx_leaf_bh);
  3290. if (ret) {
  3291. mlog_errno(ret);
  3292. goto out;
  3293. }
  3294. dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data;
  3295. if (le16_to_cpu(dx_leaf->dl_list.de_num_used) >=
  3296. le16_to_cpu(dx_leaf->dl_list.de_count)) {
  3297. if (rebalanced) {
  3298. /*
  3299. * Rebalancing should have provided us with
  3300. * space in an appropriate leaf.
  3301. *
  3302. * XXX: Is this an abnormal condition then?
  3303. * Should we print a message here?
  3304. */
  3305. ret = -ENOSPC;
  3306. goto out;
  3307. }
  3308. ret = ocfs2_dx_dir_rebalance(osb, dir, dx_root_bh, dx_leaf_bh,
  3309. &lookup->dl_hinfo, leaf_cpos,
  3310. blkno);
  3311. if (ret) {
  3312. if (ret != -ENOSPC)
  3313. mlog_errno(ret);
  3314. goto out;
  3315. }
  3316. /*
  3317. * Restart the lookup. The rebalance might have
  3318. * changed which block our item fits into. Mark our
  3319. * progress, so we only execute this once.
  3320. */
  3321. brelse(dx_leaf_bh);
  3322. dx_leaf_bh = NULL;
  3323. rebalanced = 1;
  3324. goto restart_search;
  3325. }
  3326. search_el:
  3327. lookup->dl_dx_leaf_bh = dx_leaf_bh;
  3328. dx_leaf_bh = NULL;
  3329. lookup->dl_dx_root_bh = dx_root_bh;
  3330. dx_root_bh = NULL;
  3331. out:
  3332. brelse(dx_leaf_bh);
  3333. brelse(dx_root_bh);
  3334. return ret;
  3335. }
  3336. /*
  3337. * Get a directory ready for insert. Any directory allocation required
  3338. * happens here. Success returns zero, and enough context in the dir
  3339. * lookup result that ocfs2_add_entry() will be able complete the task
  3340. * with minimal performance impact.
  3341. */
  3342. int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
  3343. struct inode *dir,
  3344. struct buffer_head *parent_fe_bh,
  3345. const char *name,
  3346. int namelen,
  3347. struct ocfs2_dir_lookup_result *lookup)
  3348. {
  3349. int ret;
  3350. unsigned int blocks_wanted = 1;
  3351. struct buffer_head *bh = NULL;
  3352. mlog(0, "getting ready to insert namelen %d into dir %llu\n",
  3353. namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno);
  3354. if (!namelen) {
  3355. ret = -EINVAL;
  3356. mlog_errno(ret);
  3357. goto out;
  3358. }
  3359. /*
  3360. * Do this up front to reduce confusion.
  3361. *
  3362. * The directory might start inline, then be turned into an
  3363. * indexed one, in which case we'd need to hash deep inside
  3364. * ocfs2_find_dir_space_id(). Since
  3365. * ocfs2_prepare_dx_dir_for_insert() also needs this hash
  3366. * done, there seems no point in spreading out the calls. We
  3367. * can optimize away the case where the file system doesn't
  3368. * support indexing.
  3369. */
  3370. if (ocfs2_supports_indexed_dirs(osb))
  3371. ocfs2_dx_dir_name_hash(dir, name, namelen, &lookup->dl_hinfo);
  3372. if (ocfs2_dir_indexed(dir)) {
  3373. ret = ocfs2_find_dir_space_dx(osb, dir, parent_fe_bh, name,
  3374. namelen, lookup);
  3375. if (ret) {
  3376. mlog_errno(ret);
  3377. goto out;
  3378. }
  3379. /*
  3380. * We intentionally fall through so that the unindexed
  3381. * tree can also be prepared.
  3382. */
  3383. }
  3384. if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
  3385. ret = ocfs2_find_dir_space_id(dir, parent_fe_bh, name,
  3386. namelen, &bh, &blocks_wanted);
  3387. } else
  3388. ret = ocfs2_find_dir_space_el(dir, name, namelen, &bh);
  3389. if (ret && ret != -ENOSPC) {
  3390. mlog_errno(ret);
  3391. goto out;
  3392. }
  3393. if (ret == -ENOSPC) {
  3394. /*
  3395. * We have to expand the directory to add this name.
  3396. */
  3397. BUG_ON(bh);
  3398. ret = ocfs2_extend_dir(osb, dir, parent_fe_bh, blocks_wanted,
  3399. lookup, &bh);
  3400. if (ret) {
  3401. if (ret != -ENOSPC)
  3402. mlog_errno(ret);
  3403. goto out;
  3404. }
  3405. BUG_ON(!bh);
  3406. }
  3407. lookup->dl_leaf_bh = bh;
  3408. bh = NULL;
  3409. out:
  3410. brelse(bh);
  3411. return ret;
  3412. }
  3413. static int ocfs2_dx_dir_remove_index(struct inode *dir,
  3414. struct buffer_head *di_bh,
  3415. struct buffer_head *dx_root_bh)
  3416. {
  3417. int ret;
  3418. struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
  3419. struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
  3420. struct ocfs2_dx_root_block *dx_root;
  3421. struct inode *dx_alloc_inode = NULL;
  3422. struct buffer_head *dx_alloc_bh = NULL;
  3423. handle_t *handle;
  3424. u64 blk;
  3425. u16 bit;
  3426. u64 bg_blkno;
  3427. dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
  3428. dx_alloc_inode = ocfs2_get_system_file_inode(osb,
  3429. EXTENT_ALLOC_SYSTEM_INODE,
  3430. le16_to_cpu(dx_root->dr_suballoc_slot));
  3431. if (!dx_alloc_inode) {
  3432. ret = -ENOMEM;
  3433. mlog_errno(ret);
  3434. goto out;
  3435. }
  3436. mutex_lock(&dx_alloc_inode->i_mutex);
  3437. ret = ocfs2_inode_lock(dx_alloc_inode, &dx_alloc_bh, 1);
  3438. if (ret) {
  3439. mlog_errno(ret);
  3440. goto out_mutex;
  3441. }
  3442. handle = ocfs2_start_trans(osb, OCFS2_DX_ROOT_REMOVE_CREDITS);
  3443. if (IS_ERR(handle)) {
  3444. ret = PTR_ERR(handle);
  3445. mlog_errno(ret);
  3446. goto out_unlock;
  3447. }
  3448. ret = ocfs2_journal_access_di(handle, dir, di_bh,
  3449. OCFS2_JOURNAL_ACCESS_WRITE);
  3450. if (ret) {
  3451. mlog_errno(ret);
  3452. goto out_commit;
  3453. }
  3454. OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL;
  3455. di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
  3456. di->i_dx_root = cpu_to_le64(0ULL);
  3457. ocfs2_journal_dirty(handle, di_bh);
  3458. blk = le64_to_cpu(dx_root->dr_blkno);
  3459. bit = le16_to_cpu(dx_root->dr_suballoc_bit);
  3460. bg_blkno = ocfs2_which_suballoc_group(blk, bit);
  3461. ret = ocfs2_free_suballoc_bits(handle, dx_alloc_inode, dx_alloc_bh,
  3462. bit, bg_blkno, 1);
  3463. if (ret)
  3464. mlog_errno(ret);
  3465. out_commit:
  3466. ocfs2_commit_trans(osb, handle);
  3467. out_unlock:
  3468. ocfs2_inode_unlock(dx_alloc_inode, 1);
  3469. out_mutex:
  3470. mutex_unlock(&dx_alloc_inode->i_mutex);
  3471. brelse(dx_alloc_bh);
  3472. out:
  3473. iput(dx_alloc_inode);
  3474. return ret;
  3475. }
  3476. int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
  3477. {
  3478. int ret;
  3479. unsigned int uninitialized_var(clen);
  3480. u32 major_hash = UINT_MAX, p_cpos, uninitialized_var(cpos);
  3481. u64 uninitialized_var(blkno);
  3482. struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
  3483. struct buffer_head *dx_root_bh = NULL;
  3484. struct ocfs2_dx_root_block *dx_root;
  3485. struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
  3486. struct ocfs2_cached_dealloc_ctxt dealloc;
  3487. struct ocfs2_extent_tree et;
  3488. ocfs2_init_dealloc_ctxt(&dealloc);
  3489. if (!ocfs2_dir_indexed(dir))
  3490. return 0;
  3491. ret = ocfs2_read_dx_root(dir, di, &dx_root_bh);
  3492. if (ret) {
  3493. mlog_errno(ret);
  3494. goto out;
  3495. }
  3496. dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
  3497. if (ocfs2_dx_root_inline(dx_root))
  3498. goto remove_index;
  3499. ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh);
  3500. /* XXX: What if dr_clusters is too large? */
  3501. while (le32_to_cpu(dx_root->dr_clusters)) {
  3502. ret = ocfs2_dx_dir_lookup_rec(dir, &dx_root->dr_list,
  3503. major_hash, &cpos, &blkno, &clen);
  3504. if (ret) {
  3505. mlog_errno(ret);
  3506. goto out;
  3507. }
  3508. p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno);
  3509. ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen,
  3510. &dealloc);
  3511. if (ret) {
  3512. mlog_errno(ret);
  3513. goto out;
  3514. }
  3515. if (cpos == 0)
  3516. break;
  3517. major_hash = cpos - 1;
  3518. }
  3519. remove_index:
  3520. ret = ocfs2_dx_dir_remove_index(dir, di_bh, dx_root_bh);
  3521. if (ret) {
  3522. mlog_errno(ret);
  3523. goto out;
  3524. }
  3525. ocfs2_remove_from_cache(dir, dx_root_bh);
  3526. out:
  3527. ocfs2_schedule_truncate_log_flush(osb, 1);
  3528. ocfs2_run_deallocs(osb, &dealloc);
  3529. brelse(dx_root_bh);
  3530. return ret;
  3531. }