jfs_xtree.c 97 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165
  1. /*
  2. * Copyright (C) International Business Machines Corp., 2000-2005
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
  12. * the GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17. */
  18. /*
  19. * jfs_xtree.c: extent allocation descriptor B+-tree manager
  20. */
  21. #include <linux/fs.h>
  22. #include <linux/module.h>
  23. #include <linux/quotaops.h>
  24. #include <linux/seq_file.h>
  25. #include "jfs_incore.h"
  26. #include "jfs_filsys.h"
  27. #include "jfs_metapage.h"
  28. #include "jfs_dmap.h"
  29. #include "jfs_dinode.h"
  30. #include "jfs_superblock.h"
  31. #include "jfs_debug.h"
  32. /*
  33. * xtree local flag
  34. */
  35. #define XT_INSERT 0x00000001
  36. /*
  37. * xtree key/entry comparison: extent offset
  38. *
  39. * return:
  40. * -1: k < start of extent
  41. * 0: start_of_extent <= k <= end_of_extent
  42. * 1: k > end_of_extent
  43. */
  44. #define XT_CMP(CMP, K, X, OFFSET64)\
  45. {\
  46. OFFSET64 = offsetXAD(X);\
  47. (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
  48. ((K) < OFFSET64) ? -1 : 0;\
  49. }
  50. /* write a xad entry */
  51. #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
  52. {\
  53. (XAD)->flag = (FLAG);\
  54. XADoffset((XAD), (OFF));\
  55. XADlength((XAD), (LEN));\
  56. XADaddress((XAD), (ADDR));\
  57. }
  58. #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
  59. /* get page buffer for specified block address */
  60. /* ToDo: Replace this ugly macro with a function */
  61. #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\
  62. {\
  63. BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\
  64. if (!(RC))\
  65. {\
  66. if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\
  67. (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\
  68. (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\
  69. {\
  70. jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\
  71. BT_PUTPAGE(MP);\
  72. MP = NULL;\
  73. RC = -EIO;\
  74. }\
  75. }\
  76. }
  77. /* for consistency */
  78. #define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
  79. #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
  80. BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
  81. /* xtree entry parameter descriptor */
  82. struct xtsplit {
  83. struct metapage *mp;
  84. s16 index;
  85. u8 flag;
  86. s64 off;
  87. s64 addr;
  88. int len;
  89. struct pxdlist *pxdlist;
  90. };
  91. /*
  92. * statistics
  93. */
  94. #ifdef CONFIG_JFS_STATISTICS
  95. static struct {
  96. uint search;
  97. uint fastSearch;
  98. uint split;
  99. } xtStat;
  100. #endif
  101. /*
  102. * forward references
  103. */
  104. static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp,
  105. struct btstack * btstack, int flag);
  106. static int xtSplitUp(tid_t tid,
  107. struct inode *ip,
  108. struct xtsplit * split, struct btstack * btstack);
  109. static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
  110. struct metapage ** rmpp, s64 * rbnp);
  111. static int xtSplitRoot(tid_t tid, struct inode *ip,
  112. struct xtsplit * split, struct metapage ** rmpp);
  113. #ifdef _STILL_TO_PORT
  114. static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
  115. xtpage_t * fp, struct btstack * btstack);
  116. static int xtSearchNode(struct inode *ip,
  117. xad_t * xad,
  118. int *cmpp, struct btstack * btstack, int flag);
  119. static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
  120. #endif /* _STILL_TO_PORT */
  121. /*
  122. * xtLookup()
  123. *
  124. * function: map a single page into a physical extent;
  125. */
  126. int xtLookup(struct inode *ip, s64 lstart,
  127. s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
  128. {
  129. int rc = 0;
  130. struct btstack btstack;
  131. int cmp;
  132. s64 bn;
  133. struct metapage *mp;
  134. xtpage_t *p;
  135. int index;
  136. xad_t *xad;
  137. s64 next, size, xoff, xend;
  138. int xlen;
  139. s64 xaddr;
  140. *paddr = 0;
  141. *plen = llen;
  142. if (!no_check) {
  143. /* is lookup offset beyond eof ? */
  144. size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
  145. JFS_SBI(ip->i_sb)->l2bsize;
  146. if (lstart >= size) {
  147. jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)",
  148. (ulong) lstart, (ulong) size);
  149. return 0;
  150. }
  151. }
  152. /*
  153. * search for the xad entry covering the logical extent
  154. */
  155. //search:
  156. if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
  157. jfs_err("xtLookup: xtSearch returned %d", rc);
  158. return rc;
  159. }
  160. /*
  161. * compute the physical extent covering logical extent
  162. *
  163. * N.B. search may have failed (e.g., hole in sparse file),
  164. * and returned the index of the next entry.
  165. */
  166. /* retrieve search result */
  167. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  168. /* is xad found covering start of logical extent ?
  169. * lstart is a page start address,
  170. * i.e., lstart cannot start in a hole;
  171. */
  172. if (cmp) {
  173. if (next)
  174. *plen = min(next - lstart, llen);
  175. goto out;
  176. }
  177. /*
  178. * lxd covered by xad
  179. */
  180. xad = &p->xad[index];
  181. xoff = offsetXAD(xad);
  182. xlen = lengthXAD(xad);
  183. xend = xoff + xlen;
  184. xaddr = addressXAD(xad);
  185. /* initialize new pxd */
  186. *pflag = xad->flag;
  187. *paddr = xaddr + (lstart - xoff);
  188. /* a page must be fully covered by an xad */
  189. *plen = min(xend - lstart, llen);
  190. out:
  191. XT_PUTPAGE(mp);
  192. return rc;
  193. }
  194. /*
  195. * xtLookupList()
  196. *
  197. * function: map a single logical extent into a list of physical extent;
  198. *
  199. * parameter:
  200. * struct inode *ip,
  201. * struct lxdlist *lxdlist, lxd list (in)
  202. * struct xadlist *xadlist, xad list (in/out)
  203. * int flag)
  204. *
  205. * coverage of lxd by xad under assumption of
  206. * . lxd's are ordered and disjoint.
  207. * . xad's are ordered and disjoint.
  208. *
  209. * return:
  210. * 0: success
  211. *
  212. * note: a page being written (even a single byte) is backed fully,
  213. * except the last page which is only backed with blocks
  214. * required to cover the last byte;
  215. * the extent backing a page is fully contained within an xad;
  216. */
  217. int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
  218. struct xadlist * xadlist, int flag)
  219. {
  220. int rc = 0;
  221. struct btstack btstack;
  222. int cmp;
  223. s64 bn;
  224. struct metapage *mp;
  225. xtpage_t *p;
  226. int index;
  227. lxd_t *lxd;
  228. xad_t *xad, *pxd;
  229. s64 size, lstart, lend, xstart, xend, pstart;
  230. s64 llen, xlen, plen;
  231. s64 xaddr, paddr;
  232. int nlxd, npxd, maxnpxd;
  233. npxd = xadlist->nxad = 0;
  234. maxnpxd = xadlist->maxnxad;
  235. pxd = xadlist->xad;
  236. nlxd = lxdlist->nlxd;
  237. lxd = lxdlist->lxd;
  238. lstart = offsetLXD(lxd);
  239. llen = lengthLXD(lxd);
  240. lend = lstart + llen;
  241. size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
  242. JFS_SBI(ip->i_sb)->l2bsize;
  243. /*
  244. * search for the xad entry covering the logical extent
  245. */
  246. search:
  247. if (lstart >= size)
  248. return 0;
  249. if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0)))
  250. return rc;
  251. /*
  252. * compute the physical extent covering logical extent
  253. *
  254. * N.B. search may have failed (e.g., hole in sparse file),
  255. * and returned the index of the next entry.
  256. */
  257. //map:
  258. /* retrieve search result */
  259. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  260. /* is xad on the next sibling page ? */
  261. if (index == le16_to_cpu(p->header.nextindex)) {
  262. if (p->header.flag & BT_ROOT)
  263. goto mapend;
  264. if ((bn = le64_to_cpu(p->header.next)) == 0)
  265. goto mapend;
  266. XT_PUTPAGE(mp);
  267. /* get next sibling page */
  268. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  269. if (rc)
  270. return rc;
  271. index = XTENTRYSTART;
  272. }
  273. xad = &p->xad[index];
  274. /*
  275. * is lxd covered by xad ?
  276. */
  277. compare:
  278. xstart = offsetXAD(xad);
  279. xlen = lengthXAD(xad);
  280. xend = xstart + xlen;
  281. xaddr = addressXAD(xad);
  282. compare1:
  283. if (xstart < lstart)
  284. goto compare2;
  285. /* (lstart <= xstart) */
  286. /* lxd is NOT covered by xad */
  287. if (lend <= xstart) {
  288. /*
  289. * get next lxd
  290. */
  291. if (--nlxd == 0)
  292. goto mapend;
  293. lxd++;
  294. lstart = offsetLXD(lxd);
  295. llen = lengthLXD(lxd);
  296. lend = lstart + llen;
  297. if (lstart >= size)
  298. goto mapend;
  299. /* compare with the current xad */
  300. goto compare1;
  301. }
  302. /* lxd is covered by xad */
  303. else { /* (xstart < lend) */
  304. /* initialize new pxd */
  305. pstart = xstart;
  306. plen = min(lend - xstart, xlen);
  307. paddr = xaddr;
  308. goto cover;
  309. }
  310. /* (xstart < lstart) */
  311. compare2:
  312. /* lxd is covered by xad */
  313. if (lstart < xend) {
  314. /* initialize new pxd */
  315. pstart = lstart;
  316. plen = min(xend - lstart, llen);
  317. paddr = xaddr + (lstart - xstart);
  318. goto cover;
  319. }
  320. /* lxd is NOT covered by xad */
  321. else { /* (xend <= lstart) */
  322. /*
  323. * get next xad
  324. *
  325. * linear search next xad covering lxd on
  326. * the current xad page, and then tree search
  327. */
  328. if (index == le16_to_cpu(p->header.nextindex) - 1) {
  329. if (p->header.flag & BT_ROOT)
  330. goto mapend;
  331. XT_PUTPAGE(mp);
  332. goto search;
  333. } else {
  334. index++;
  335. xad++;
  336. /* compare with new xad */
  337. goto compare;
  338. }
  339. }
  340. /*
  341. * lxd is covered by xad and a new pxd has been initialized
  342. * (lstart <= xstart < lend) or (xstart < lstart < xend)
  343. */
  344. cover:
  345. /* finalize pxd corresponding to current xad */
  346. XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr);
  347. if (++npxd >= maxnpxd)
  348. goto mapend;
  349. pxd++;
  350. /*
  351. * lxd is fully covered by xad
  352. */
  353. if (lend <= xend) {
  354. /*
  355. * get next lxd
  356. */
  357. if (--nlxd == 0)
  358. goto mapend;
  359. lxd++;
  360. lstart = offsetLXD(lxd);
  361. llen = lengthLXD(lxd);
  362. lend = lstart + llen;
  363. if (lstart >= size)
  364. goto mapend;
  365. /*
  366. * test for old xad covering new lxd
  367. * (old xstart < new lstart)
  368. */
  369. goto compare2;
  370. }
  371. /*
  372. * lxd is partially covered by xad
  373. */
  374. else { /* (xend < lend) */
  375. /*
  376. * get next xad
  377. *
  378. * linear search next xad covering lxd on
  379. * the current xad page, and then next xad page search
  380. */
  381. if (index == le16_to_cpu(p->header.nextindex) - 1) {
  382. if (p->header.flag & BT_ROOT)
  383. goto mapend;
  384. if ((bn = le64_to_cpu(p->header.next)) == 0)
  385. goto mapend;
  386. XT_PUTPAGE(mp);
  387. /* get next sibling page */
  388. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  389. if (rc)
  390. return rc;
  391. index = XTENTRYSTART;
  392. xad = &p->xad[index];
  393. } else {
  394. index++;
  395. xad++;
  396. }
  397. /*
  398. * test for new xad covering old lxd
  399. * (old lstart < new xstart)
  400. */
  401. goto compare;
  402. }
  403. mapend:
  404. xadlist->nxad = npxd;
  405. //out:
  406. XT_PUTPAGE(mp);
  407. return rc;
  408. }
  409. /*
  410. * xtSearch()
  411. *
  412. * function: search for the xad entry covering specified offset.
  413. *
  414. * parameters:
  415. * ip - file object;
  416. * xoff - extent offset;
  417. * nextp - address of next extent (if any) for search miss
  418. * cmpp - comparison result:
  419. * btstack - traverse stack;
  420. * flag - search process flag (XT_INSERT);
  421. *
  422. * returns:
  423. * btstack contains (bn, index) of search path traversed to the entry.
  424. * *cmpp is set to result of comparison with the entry returned.
  425. * the page containing the entry is pinned at exit.
  426. */
  427. static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
  428. int *cmpp, struct btstack * btstack, int flag)
  429. {
  430. struct jfs_inode_info *jfs_ip = JFS_IP(ip);
  431. int rc = 0;
  432. int cmp = 1; /* init for empty page */
  433. s64 bn; /* block number */
  434. struct metapage *mp; /* page buffer */
  435. xtpage_t *p; /* page */
  436. xad_t *xad;
  437. int base, index, lim, btindex;
  438. struct btframe *btsp;
  439. int nsplit = 0; /* number of pages to split */
  440. s64 t64;
  441. s64 next = 0;
  442. INCREMENT(xtStat.search);
  443. BT_CLR(btstack);
  444. btstack->nsplit = 0;
  445. /*
  446. * search down tree from root:
  447. *
  448. * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
  449. * internal page, child page Pi contains entry with k, Ki <= K < Kj.
  450. *
  451. * if entry with search key K is not found
  452. * internal page search find the entry with largest key Ki
  453. * less than K which point to the child page to search;
  454. * leaf page search find the entry with smallest key Kj
  455. * greater than K so that the returned index is the position of
  456. * the entry to be shifted right for insertion of new entry.
  457. * for empty tree, search key is greater than any key of the tree.
  458. *
  459. * by convention, root bn = 0.
  460. */
  461. for (bn = 0;;) {
  462. /* get/pin the page to search */
  463. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  464. if (rc)
  465. return rc;
  466. /* try sequential access heuristics with the previous
  467. * access entry in target leaf page:
  468. * once search narrowed down into the target leaf,
  469. * key must either match an entry in the leaf or
  470. * key entry does not exist in the tree;
  471. */
  472. //fastSearch:
  473. if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
  474. (p->header.flag & BT_LEAF) &&
  475. (index = jfs_ip->btindex) <
  476. le16_to_cpu(p->header.nextindex)) {
  477. xad = &p->xad[index];
  478. t64 = offsetXAD(xad);
  479. if (xoff < t64 + lengthXAD(xad)) {
  480. if (xoff >= t64) {
  481. *cmpp = 0;
  482. goto out;
  483. }
  484. /* stop sequential access heuristics */
  485. goto binarySearch;
  486. } else { /* (t64 + lengthXAD(xad)) <= xoff */
  487. /* try next sequential entry */
  488. index++;
  489. if (index <
  490. le16_to_cpu(p->header.nextindex)) {
  491. xad++;
  492. t64 = offsetXAD(xad);
  493. if (xoff < t64 + lengthXAD(xad)) {
  494. if (xoff >= t64) {
  495. *cmpp = 0;
  496. goto out;
  497. }
  498. /* miss: key falls between
  499. * previous and this entry
  500. */
  501. *cmpp = 1;
  502. next = t64;
  503. goto out;
  504. }
  505. /* (xoff >= t64 + lengthXAD(xad));
  506. * matching entry may be further out:
  507. * stop heuristic search
  508. */
  509. /* stop sequential access heuristics */
  510. goto binarySearch;
  511. }
  512. /* (index == p->header.nextindex);
  513. * miss: key entry does not exist in
  514. * the target leaf/tree
  515. */
  516. *cmpp = 1;
  517. goto out;
  518. }
  519. /*
  520. * if hit, return index of the entry found, and
  521. * if miss, where new entry with search key is
  522. * to be inserted;
  523. */
  524. out:
  525. /* compute number of pages to split */
  526. if (flag & XT_INSERT) {
  527. if (p->header.nextindex == /* little-endian */
  528. p->header.maxentry)
  529. nsplit++;
  530. else
  531. nsplit = 0;
  532. btstack->nsplit = nsplit;
  533. }
  534. /* save search result */
  535. btsp = btstack->top;
  536. btsp->bn = bn;
  537. btsp->index = index;
  538. btsp->mp = mp;
  539. /* update sequential access heuristics */
  540. jfs_ip->btindex = index;
  541. if (nextp)
  542. *nextp = next;
  543. INCREMENT(xtStat.fastSearch);
  544. return 0;
  545. }
  546. /* well, ... full search now */
  547. binarySearch:
  548. lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
  549. /*
  550. * binary search with search key K on the current page
  551. */
  552. for (base = XTENTRYSTART; lim; lim >>= 1) {
  553. index = base + (lim >> 1);
  554. XT_CMP(cmp, xoff, &p->xad[index], t64);
  555. if (cmp == 0) {
  556. /*
  557. * search hit
  558. */
  559. /* search hit - leaf page:
  560. * return the entry found
  561. */
  562. if (p->header.flag & BT_LEAF) {
  563. *cmpp = cmp;
  564. /* compute number of pages to split */
  565. if (flag & XT_INSERT) {
  566. if (p->header.nextindex ==
  567. p->header.maxentry)
  568. nsplit++;
  569. else
  570. nsplit = 0;
  571. btstack->nsplit = nsplit;
  572. }
  573. /* save search result */
  574. btsp = btstack->top;
  575. btsp->bn = bn;
  576. btsp->index = index;
  577. btsp->mp = mp;
  578. /* init sequential access heuristics */
  579. btindex = jfs_ip->btindex;
  580. if (index == btindex ||
  581. index == btindex + 1)
  582. jfs_ip->btorder = BT_SEQUENTIAL;
  583. else
  584. jfs_ip->btorder = BT_RANDOM;
  585. jfs_ip->btindex = index;
  586. return 0;
  587. }
  588. /* search hit - internal page:
  589. * descend/search its child page
  590. */
  591. if (index < le16_to_cpu(p->header.nextindex)-1)
  592. next = offsetXAD(&p->xad[index + 1]);
  593. goto next;
  594. }
  595. if (cmp > 0) {
  596. base = index + 1;
  597. --lim;
  598. }
  599. }
  600. /*
  601. * search miss
  602. *
  603. * base is the smallest index with key (Kj) greater than
  604. * search key (K) and may be zero or maxentry index.
  605. */
  606. if (base < le16_to_cpu(p->header.nextindex))
  607. next = offsetXAD(&p->xad[base]);
  608. /*
  609. * search miss - leaf page:
  610. *
  611. * return location of entry (base) where new entry with
  612. * search key K is to be inserted.
  613. */
  614. if (p->header.flag & BT_LEAF) {
  615. *cmpp = cmp;
  616. /* compute number of pages to split */
  617. if (flag & XT_INSERT) {
  618. if (p->header.nextindex ==
  619. p->header.maxentry)
  620. nsplit++;
  621. else
  622. nsplit = 0;
  623. btstack->nsplit = nsplit;
  624. }
  625. /* save search result */
  626. btsp = btstack->top;
  627. btsp->bn = bn;
  628. btsp->index = base;
  629. btsp->mp = mp;
  630. /* init sequential access heuristics */
  631. btindex = jfs_ip->btindex;
  632. if (base == btindex || base == btindex + 1)
  633. jfs_ip->btorder = BT_SEQUENTIAL;
  634. else
  635. jfs_ip->btorder = BT_RANDOM;
  636. jfs_ip->btindex = base;
  637. if (nextp)
  638. *nextp = next;
  639. return 0;
  640. }
  641. /*
  642. * search miss - non-leaf page:
  643. *
  644. * if base is non-zero, decrement base by one to get the parent
  645. * entry of the child page to search.
  646. */
  647. index = base ? base - 1 : base;
  648. /*
  649. * go down to child page
  650. */
  651. next:
  652. /* update number of pages to split */
  653. if (p->header.nextindex == p->header.maxentry)
  654. nsplit++;
  655. else
  656. nsplit = 0;
  657. /* push (bn, index) of the parent page/entry */
  658. if (BT_STACK_FULL(btstack)) {
  659. jfs_error(ip->i_sb, "stack overrun in xtSearch!");
  660. XT_PUTPAGE(mp);
  661. return -EIO;
  662. }
  663. BT_PUSH(btstack, bn, index);
  664. /* get the child page block number */
  665. bn = addressXAD(&p->xad[index]);
  666. /* unpin the parent page */
  667. XT_PUTPAGE(mp);
  668. }
  669. }
  670. /*
  671. * xtInsert()
  672. *
  673. * function:
  674. *
  675. * parameter:
  676. * tid - transaction id;
  677. * ip - file object;
  678. * xflag - extent flag (XAD_NOTRECORDED):
  679. * xoff - extent offset;
  680. * xlen - extent length;
  681. * xaddrp - extent address pointer (in/out):
  682. * if (*xaddrp)
  683. * caller allocated data extent at *xaddrp;
  684. * else
  685. * allocate data extent and return its xaddr;
  686. * flag -
  687. *
  688. * return:
  689. */
  690. int xtInsert(tid_t tid, /* transaction id */
  691. struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
  692. int flag)
  693. {
  694. int rc = 0;
  695. s64 xaddr, hint;
  696. struct metapage *mp; /* meta-page buffer */
  697. xtpage_t *p; /* base B+-tree index page */
  698. s64 bn;
  699. int index, nextindex;
  700. struct btstack btstack; /* traverse stack */
  701. struct xtsplit split; /* split information */
  702. xad_t *xad;
  703. int cmp;
  704. s64 next;
  705. struct tlock *tlck;
  706. struct xtlock *xtlck;
  707. jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
  708. /*
  709. * search for the entry location at which to insert:
  710. *
  711. * xtFastSearch() and xtSearch() both returns (leaf page
  712. * pinned, index at which to insert).
  713. * n.b. xtSearch() may return index of maxentry of
  714. * the full page.
  715. */
  716. if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
  717. return rc;
  718. /* retrieve search result */
  719. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  720. /* This test must follow XT_GETSEARCH since mp must be valid if
  721. * we branch to out: */
  722. if ((cmp == 0) || (next && (xlen > next - xoff))) {
  723. rc = -EEXIST;
  724. goto out;
  725. }
  726. /*
  727. * allocate data extent requested
  728. *
  729. * allocation hint: last xad
  730. */
  731. if ((xaddr = *xaddrp) == 0) {
  732. if (index > XTENTRYSTART) {
  733. xad = &p->xad[index - 1];
  734. hint = addressXAD(xad) + lengthXAD(xad) - 1;
  735. } else
  736. hint = 0;
  737. if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen)))
  738. goto out;
  739. if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
  740. DQUOT_FREE_BLOCK(ip, xlen);
  741. goto out;
  742. }
  743. }
  744. /*
  745. * insert entry for new extent
  746. */
  747. xflag |= XAD_NEW;
  748. /*
  749. * if the leaf page is full, split the page and
  750. * propagate up the router entry for the new page from split
  751. *
  752. * The xtSplitUp() will insert the entry and unpin the leaf page.
  753. */
  754. nextindex = le16_to_cpu(p->header.nextindex);
  755. if (nextindex == le16_to_cpu(p->header.maxentry)) {
  756. split.mp = mp;
  757. split.index = index;
  758. split.flag = xflag;
  759. split.off = xoff;
  760. split.len = xlen;
  761. split.addr = xaddr;
  762. split.pxdlist = NULL;
  763. if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
  764. /* undo data extent allocation */
  765. if (*xaddrp == 0) {
  766. dbFree(ip, xaddr, (s64) xlen);
  767. DQUOT_FREE_BLOCK(ip, xlen);
  768. }
  769. return rc;
  770. }
  771. *xaddrp = xaddr;
  772. return 0;
  773. }
  774. /*
  775. * insert the new entry into the leaf page
  776. */
  777. /*
  778. * acquire a transaction lock on the leaf page;
  779. *
  780. * action: xad insertion/extension;
  781. */
  782. BT_MARK_DIRTY(mp, ip);
  783. /* if insert into middle, shift right remaining entries. */
  784. if (index < nextindex)
  785. memmove(&p->xad[index + 1], &p->xad[index],
  786. (nextindex - index) * sizeof(xad_t));
  787. /* insert the new entry: mark the entry NEW */
  788. xad = &p->xad[index];
  789. XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
  790. /* advance next available entry index */
  791. le16_add_cpu(&p->header.nextindex, 1);
  792. /* Don't log it if there are no links to the file */
  793. if (!test_cflag(COMMIT_Nolink, ip)) {
  794. tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
  795. xtlck = (struct xtlock *) & tlck->lock;
  796. xtlck->lwm.offset =
  797. (xtlck->lwm.offset) ? min(index,
  798. (int)xtlck->lwm.offset) : index;
  799. xtlck->lwm.length =
  800. le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
  801. }
  802. *xaddrp = xaddr;
  803. out:
  804. /* unpin the leaf page */
  805. XT_PUTPAGE(mp);
  806. return rc;
  807. }
  808. /*
  809. * xtSplitUp()
  810. *
  811. * function:
  812. * split full pages as propagating insertion up the tree
  813. *
  814. * parameter:
  815. * tid - transaction id;
  816. * ip - file object;
  817. * split - entry parameter descriptor;
  818. * btstack - traverse stack from xtSearch()
  819. *
  820. * return:
  821. */
  822. static int
  823. xtSplitUp(tid_t tid,
  824. struct inode *ip, struct xtsplit * split, struct btstack * btstack)
  825. {
  826. int rc = 0;
  827. struct metapage *smp;
  828. xtpage_t *sp; /* split page */
  829. struct metapage *rmp;
  830. s64 rbn; /* new right page block number */
  831. struct metapage *rcmp;
  832. xtpage_t *rcp; /* right child page */
  833. s64 rcbn; /* right child page block number */
  834. int skip; /* index of entry of insertion */
  835. int nextindex; /* next available entry index of p */
  836. struct btframe *parent; /* parent page entry on traverse stack */
  837. xad_t *xad;
  838. s64 xaddr;
  839. int xlen;
  840. int nsplit; /* number of pages split */
  841. struct pxdlist pxdlist;
  842. pxd_t *pxd;
  843. struct tlock *tlck;
  844. struct xtlock *xtlck;
  845. smp = split->mp;
  846. sp = XT_PAGE(ip, smp);
  847. /* is inode xtree root extension/inline EA area free ? */
  848. if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
  849. (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) &&
  850. (JFS_IP(ip)->mode2 & INLINEEA)) {
  851. sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
  852. JFS_IP(ip)->mode2 &= ~INLINEEA;
  853. BT_MARK_DIRTY(smp, ip);
  854. /*
  855. * acquire a transaction lock on the leaf page;
  856. *
  857. * action: xad insertion/extension;
  858. */
  859. /* if insert into middle, shift right remaining entries. */
  860. skip = split->index;
  861. nextindex = le16_to_cpu(sp->header.nextindex);
  862. if (skip < nextindex)
  863. memmove(&sp->xad[skip + 1], &sp->xad[skip],
  864. (nextindex - skip) * sizeof(xad_t));
  865. /* insert the new entry: mark the entry NEW */
  866. xad = &sp->xad[skip];
  867. XT_PUTENTRY(xad, split->flag, split->off, split->len,
  868. split->addr);
  869. /* advance next available entry index */
  870. le16_add_cpu(&sp->header.nextindex, 1);
  871. /* Don't log it if there are no links to the file */
  872. if (!test_cflag(COMMIT_Nolink, ip)) {
  873. tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
  874. xtlck = (struct xtlock *) & tlck->lock;
  875. xtlck->lwm.offset = (xtlck->lwm.offset) ?
  876. min(skip, (int)xtlck->lwm.offset) : skip;
  877. xtlck->lwm.length =
  878. le16_to_cpu(sp->header.nextindex) -
  879. xtlck->lwm.offset;
  880. }
  881. return 0;
  882. }
  883. /*
  884. * allocate new index blocks to cover index page split(s)
  885. *
  886. * allocation hint: ?
  887. */
  888. if (split->pxdlist == NULL) {
  889. nsplit = btstack->nsplit;
  890. split->pxdlist = &pxdlist;
  891. pxdlist.maxnpxd = pxdlist.npxd = 0;
  892. pxd = &pxdlist.pxd[0];
  893. xlen = JFS_SBI(ip->i_sb)->nbperpage;
  894. for (; nsplit > 0; nsplit--, pxd++) {
  895. if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
  896. == 0) {
  897. PXDaddress(pxd, xaddr);
  898. PXDlength(pxd, xlen);
  899. pxdlist.maxnpxd++;
  900. continue;
  901. }
  902. /* undo allocation */
  903. XT_PUTPAGE(smp);
  904. return rc;
  905. }
  906. }
  907. /*
  908. * Split leaf page <sp> into <sp> and a new right page <rp>.
  909. *
  910. * The split routines insert the new entry into the leaf page,
  911. * and acquire txLock as appropriate.
  912. * return <rp> pinned and its block number <rpbn>.
  913. */
  914. rc = (sp->header.flag & BT_ROOT) ?
  915. xtSplitRoot(tid, ip, split, &rmp) :
  916. xtSplitPage(tid, ip, split, &rmp, &rbn);
  917. XT_PUTPAGE(smp);
  918. if (rc)
  919. return -EIO;
  920. /*
  921. * propagate up the router entry for the leaf page just split
  922. *
  923. * insert a router entry for the new page into the parent page,
  924. * propagate the insert/split up the tree by walking back the stack
  925. * of (bn of parent page, index of child page entry in parent page)
  926. * that were traversed during the search for the page that split.
  927. *
  928. * the propagation of insert/split up the tree stops if the root
  929. * splits or the page inserted into doesn't have to split to hold
  930. * the new entry.
  931. *
  932. * the parent entry for the split page remains the same, and
  933. * a new entry is inserted at its right with the first key and
  934. * block number of the new right page.
  935. *
  936. * There are a maximum of 3 pages pinned at any time:
  937. * right child, left parent and right parent (when the parent splits)
  938. * to keep the child page pinned while working on the parent.
  939. * make sure that all pins are released at exit.
  940. */
  941. while ((parent = BT_POP(btstack)) != NULL) {
  942. /* parent page specified by stack frame <parent> */
  943. /* keep current child pages <rcp> pinned */
  944. rcmp = rmp;
  945. rcbn = rbn;
  946. rcp = XT_PAGE(ip, rcmp);
  947. /*
  948. * insert router entry in parent for new right child page <rp>
  949. */
  950. /* get/pin the parent page <sp> */
  951. XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
  952. if (rc) {
  953. XT_PUTPAGE(rcmp);
  954. return rc;
  955. }
  956. /*
  957. * The new key entry goes ONE AFTER the index of parent entry,
  958. * because the split was to the right.
  959. */
  960. skip = parent->index + 1;
  961. /*
  962. * split or shift right remaining entries of the parent page
  963. */
  964. nextindex = le16_to_cpu(sp->header.nextindex);
  965. /*
  966. * parent page is full - split the parent page
  967. */
  968. if (nextindex == le16_to_cpu(sp->header.maxentry)) {
  969. /* init for parent page split */
  970. split->mp = smp;
  971. split->index = skip; /* index at insert */
  972. split->flag = XAD_NEW;
  973. split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
  974. split->len = JFS_SBI(ip->i_sb)->nbperpage;
  975. split->addr = rcbn;
  976. /* unpin previous right child page */
  977. XT_PUTPAGE(rcmp);
  978. /* The split routines insert the new entry,
  979. * and acquire txLock as appropriate.
  980. * return <rp> pinned and its block number <rpbn>.
  981. */
  982. rc = (sp->header.flag & BT_ROOT) ?
  983. xtSplitRoot(tid, ip, split, &rmp) :
  984. xtSplitPage(tid, ip, split, &rmp, &rbn);
  985. if (rc) {
  986. XT_PUTPAGE(smp);
  987. return rc;
  988. }
  989. XT_PUTPAGE(smp);
  990. /* keep new child page <rp> pinned */
  991. }
  992. /*
  993. * parent page is not full - insert in parent page
  994. */
  995. else {
  996. /*
  997. * insert router entry in parent for the right child
  998. * page from the first entry of the right child page:
  999. */
  1000. /*
  1001. * acquire a transaction lock on the parent page;
  1002. *
  1003. * action: router xad insertion;
  1004. */
  1005. BT_MARK_DIRTY(smp, ip);
  1006. /*
  1007. * if insert into middle, shift right remaining entries
  1008. */
  1009. if (skip < nextindex)
  1010. memmove(&sp->xad[skip + 1], &sp->xad[skip],
  1011. (nextindex -
  1012. skip) << L2XTSLOTSIZE);
  1013. /* insert the router entry */
  1014. xad = &sp->xad[skip];
  1015. XT_PUTENTRY(xad, XAD_NEW,
  1016. offsetXAD(&rcp->xad[XTENTRYSTART]),
  1017. JFS_SBI(ip->i_sb)->nbperpage, rcbn);
  1018. /* advance next available entry index. */
  1019. le16_add_cpu(&sp->header.nextindex, 1);
  1020. /* Don't log it if there are no links to the file */
  1021. if (!test_cflag(COMMIT_Nolink, ip)) {
  1022. tlck = txLock(tid, ip, smp,
  1023. tlckXTREE | tlckGROW);
  1024. xtlck = (struct xtlock *) & tlck->lock;
  1025. xtlck->lwm.offset = (xtlck->lwm.offset) ?
  1026. min(skip, (int)xtlck->lwm.offset) : skip;
  1027. xtlck->lwm.length =
  1028. le16_to_cpu(sp->header.nextindex) -
  1029. xtlck->lwm.offset;
  1030. }
  1031. /* unpin parent page */
  1032. XT_PUTPAGE(smp);
  1033. /* exit propagate up */
  1034. break;
  1035. }
  1036. }
  1037. /* unpin current right page */
  1038. XT_PUTPAGE(rmp);
  1039. return 0;
  1040. }
  1041. /*
  1042. * xtSplitPage()
  1043. *
  1044. * function:
  1045. * split a full non-root page into
  1046. * original/split/left page and new right page
  1047. * i.e., the original/split page remains as left page.
  1048. *
  1049. * parameter:
  1050. * int tid,
  1051. * struct inode *ip,
  1052. * struct xtsplit *split,
  1053. * struct metapage **rmpp,
  1054. * u64 *rbnp,
  1055. *
  1056. * return:
  1057. * Pointer to page in which to insert or NULL on error.
  1058. */
  1059. static int
  1060. xtSplitPage(tid_t tid, struct inode *ip,
  1061. struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
  1062. {
  1063. int rc = 0;
  1064. struct metapage *smp;
  1065. xtpage_t *sp;
  1066. struct metapage *rmp;
  1067. xtpage_t *rp; /* new right page allocated */
  1068. s64 rbn; /* new right page block number */
  1069. struct metapage *mp;
  1070. xtpage_t *p;
  1071. s64 nextbn;
  1072. int skip, maxentry, middle, righthalf, n;
  1073. xad_t *xad;
  1074. struct pxdlist *pxdlist;
  1075. pxd_t *pxd;
  1076. struct tlock *tlck;
  1077. struct xtlock *sxtlck = NULL, *rxtlck = NULL;
  1078. int quota_allocation = 0;
  1079. smp = split->mp;
  1080. sp = XT_PAGE(ip, smp);
  1081. INCREMENT(xtStat.split);
  1082. pxdlist = split->pxdlist;
  1083. pxd = &pxdlist->pxd[pxdlist->npxd];
  1084. pxdlist->npxd++;
  1085. rbn = addressPXD(pxd);
  1086. /* Allocate blocks to quota. */
  1087. if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
  1088. rc = -EDQUOT;
  1089. goto clean_up;
  1090. }
  1091. quota_allocation += lengthPXD(pxd);
  1092. /*
  1093. * allocate the new right page for the split
  1094. */
  1095. rmp = get_metapage(ip, rbn, PSIZE, 1);
  1096. if (rmp == NULL) {
  1097. rc = -EIO;
  1098. goto clean_up;
  1099. }
  1100. jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
  1101. BT_MARK_DIRTY(rmp, ip);
  1102. /*
  1103. * action: new page;
  1104. */
  1105. rp = (xtpage_t *) rmp->data;
  1106. rp->header.self = *pxd;
  1107. rp->header.flag = sp->header.flag & BT_TYPE;
  1108. rp->header.maxentry = sp->header.maxentry; /* little-endian */
  1109. rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
  1110. BT_MARK_DIRTY(smp, ip);
  1111. /* Don't log it if there are no links to the file */
  1112. if (!test_cflag(COMMIT_Nolink, ip)) {
  1113. /*
  1114. * acquire a transaction lock on the new right page;
  1115. */
  1116. tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
  1117. rxtlck = (struct xtlock *) & tlck->lock;
  1118. rxtlck->lwm.offset = XTENTRYSTART;
  1119. /*
  1120. * acquire a transaction lock on the split page
  1121. */
  1122. tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
  1123. sxtlck = (struct xtlock *) & tlck->lock;
  1124. }
  1125. /*
  1126. * initialize/update sibling pointers of <sp> and <rp>
  1127. */
  1128. nextbn = le64_to_cpu(sp->header.next);
  1129. rp->header.next = cpu_to_le64(nextbn);
  1130. rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
  1131. sp->header.next = cpu_to_le64(rbn);
  1132. skip = split->index;
  1133. /*
  1134. * sequential append at tail (after last entry of last page)
  1135. *
  1136. * if splitting the last page on a level because of appending
  1137. * a entry to it (skip is maxentry), it's likely that the access is
  1138. * sequential. adding an empty page on the side of the level is less
  1139. * work and can push the fill factor much higher than normal.
  1140. * if we're wrong it's no big deal - we will do the split the right
  1141. * way next time.
  1142. * (it may look like it's equally easy to do a similar hack for
  1143. * reverse sorted data, that is, split the tree left, but it's not.
  1144. * Be my guest.)
  1145. */
  1146. if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
  1147. /*
  1148. * acquire a transaction lock on the new/right page;
  1149. *
  1150. * action: xad insertion;
  1151. */
  1152. /* insert entry at the first entry of the new right page */
  1153. xad = &rp->xad[XTENTRYSTART];
  1154. XT_PUTENTRY(xad, split->flag, split->off, split->len,
  1155. split->addr);
  1156. rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
  1157. if (!test_cflag(COMMIT_Nolink, ip)) {
  1158. /* rxtlck->lwm.offset = XTENTRYSTART; */
  1159. rxtlck->lwm.length = 1;
  1160. }
  1161. *rmpp = rmp;
  1162. *rbnp = rbn;
  1163. jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
  1164. return 0;
  1165. }
  1166. /*
  1167. * non-sequential insert (at possibly middle page)
  1168. */
  1169. /*
  1170. * update previous pointer of old next/right page of <sp>
  1171. */
  1172. if (nextbn != 0) {
  1173. XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
  1174. if (rc) {
  1175. XT_PUTPAGE(rmp);
  1176. goto clean_up;
  1177. }
  1178. BT_MARK_DIRTY(mp, ip);
  1179. /*
  1180. * acquire a transaction lock on the next page;
  1181. *
  1182. * action:sibling pointer update;
  1183. */
  1184. if (!test_cflag(COMMIT_Nolink, ip))
  1185. tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
  1186. p->header.prev = cpu_to_le64(rbn);
  1187. /* sibling page may have been updated previously, or
  1188. * it may be updated later;
  1189. */
  1190. XT_PUTPAGE(mp);
  1191. }
  1192. /*
  1193. * split the data between the split and new/right pages
  1194. */
  1195. maxentry = le16_to_cpu(sp->header.maxentry);
  1196. middle = maxentry >> 1;
  1197. righthalf = maxentry - middle;
  1198. /*
  1199. * skip index in old split/left page - insert into left page:
  1200. */
  1201. if (skip <= middle) {
  1202. /* move right half of split page to the new right page */
  1203. memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
  1204. righthalf << L2XTSLOTSIZE);
  1205. /* shift right tail of left half to make room for new entry */
  1206. if (skip < middle)
  1207. memmove(&sp->xad[skip + 1], &sp->xad[skip],
  1208. (middle - skip) << L2XTSLOTSIZE);
  1209. /* insert new entry */
  1210. xad = &sp->xad[skip];
  1211. XT_PUTENTRY(xad, split->flag, split->off, split->len,
  1212. split->addr);
  1213. /* update page header */
  1214. sp->header.nextindex = cpu_to_le16(middle + 1);
  1215. if (!test_cflag(COMMIT_Nolink, ip)) {
  1216. sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
  1217. min(skip, (int)sxtlck->lwm.offset) : skip;
  1218. }
  1219. rp->header.nextindex =
  1220. cpu_to_le16(XTENTRYSTART + righthalf);
  1221. }
  1222. /*
  1223. * skip index in new right page - insert into right page:
  1224. */
  1225. else {
  1226. /* move left head of right half to right page */
  1227. n = skip - middle;
  1228. memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
  1229. n << L2XTSLOTSIZE);
  1230. /* insert new entry */
  1231. n += XTENTRYSTART;
  1232. xad = &rp->xad[n];
  1233. XT_PUTENTRY(xad, split->flag, split->off, split->len,
  1234. split->addr);
  1235. /* move right tail of right half to right page */
  1236. if (skip < maxentry)
  1237. memmove(&rp->xad[n + 1], &sp->xad[skip],
  1238. (maxentry - skip) << L2XTSLOTSIZE);
  1239. /* update page header */
  1240. sp->header.nextindex = cpu_to_le16(middle);
  1241. if (!test_cflag(COMMIT_Nolink, ip)) {
  1242. sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
  1243. min(middle, (int)sxtlck->lwm.offset) : middle;
  1244. }
  1245. rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
  1246. righthalf + 1);
  1247. }
  1248. if (!test_cflag(COMMIT_Nolink, ip)) {
  1249. sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
  1250. sxtlck->lwm.offset;
  1251. /* rxtlck->lwm.offset = XTENTRYSTART; */
  1252. rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
  1253. XTENTRYSTART;
  1254. }
  1255. *rmpp = rmp;
  1256. *rbnp = rbn;
  1257. jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
  1258. return rc;
  1259. clean_up:
  1260. /* Rollback quota allocation. */
  1261. if (quota_allocation)
  1262. DQUOT_FREE_BLOCK(ip, quota_allocation);
  1263. return (rc);
  1264. }
  1265. /*
  1266. * xtSplitRoot()
  1267. *
  1268. * function:
  1269. * split the full root page into original/root/split page and new
  1270. * right page
  1271. * i.e., root remains fixed in tree anchor (inode) and the root is
  1272. * copied to a single new right child page since root page <<
  1273. * non-root page, and the split root page contains a single entry
  1274. * for the new right child page.
  1275. *
  1276. * parameter:
  1277. * int tid,
  1278. * struct inode *ip,
  1279. * struct xtsplit *split,
  1280. * struct metapage **rmpp)
  1281. *
  1282. * return:
  1283. * Pointer to page in which to insert or NULL on error.
  1284. */
  1285. static int
  1286. xtSplitRoot(tid_t tid,
  1287. struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
  1288. {
  1289. xtpage_t *sp;
  1290. struct metapage *rmp;
  1291. xtpage_t *rp;
  1292. s64 rbn;
  1293. int skip, nextindex;
  1294. xad_t *xad;
  1295. pxd_t *pxd;
  1296. struct pxdlist *pxdlist;
  1297. struct tlock *tlck;
  1298. struct xtlock *xtlck;
  1299. sp = &JFS_IP(ip)->i_xtroot;
  1300. INCREMENT(xtStat.split);
  1301. /*
  1302. * allocate a single (right) child page
  1303. */
  1304. pxdlist = split->pxdlist;
  1305. pxd = &pxdlist->pxd[pxdlist->npxd];
  1306. pxdlist->npxd++;
  1307. rbn = addressPXD(pxd);
  1308. rmp = get_metapage(ip, rbn, PSIZE, 1);
  1309. if (rmp == NULL)
  1310. return -EIO;
  1311. /* Allocate blocks to quota. */
  1312. if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
  1313. release_metapage(rmp);
  1314. return -EDQUOT;
  1315. }
  1316. jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
  1317. /*
  1318. * acquire a transaction lock on the new right page;
  1319. *
  1320. * action: new page;
  1321. */
  1322. BT_MARK_DIRTY(rmp, ip);
  1323. rp = (xtpage_t *) rmp->data;
  1324. rp->header.flag =
  1325. (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
  1326. rp->header.self = *pxd;
  1327. rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
  1328. rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
  1329. /* initialize sibling pointers */
  1330. rp->header.next = 0;
  1331. rp->header.prev = 0;
  1332. /*
  1333. * copy the in-line root page into new right page extent
  1334. */
  1335. nextindex = le16_to_cpu(sp->header.maxentry);
  1336. memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
  1337. (nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
  1338. /*
  1339. * insert the new entry into the new right/child page
  1340. * (skip index in the new right page will not change)
  1341. */
  1342. skip = split->index;
  1343. /* if insert into middle, shift right remaining entries */
  1344. if (skip != nextindex)
  1345. memmove(&rp->xad[skip + 1], &rp->xad[skip],
  1346. (nextindex - skip) * sizeof(xad_t));
  1347. xad = &rp->xad[skip];
  1348. XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
  1349. /* update page header */
  1350. rp->header.nextindex = cpu_to_le16(nextindex + 1);
  1351. if (!test_cflag(COMMIT_Nolink, ip)) {
  1352. tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
  1353. xtlck = (struct xtlock *) & tlck->lock;
  1354. xtlck->lwm.offset = XTENTRYSTART;
  1355. xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
  1356. XTENTRYSTART;
  1357. }
  1358. /*
  1359. * reset the root
  1360. *
  1361. * init root with the single entry for the new right page
  1362. * set the 1st entry offset to 0, which force the left-most key
  1363. * at any level of the tree to be less than any search key.
  1364. */
  1365. /*
  1366. * acquire a transaction lock on the root page (in-memory inode);
  1367. *
  1368. * action: root split;
  1369. */
  1370. BT_MARK_DIRTY(split->mp, ip);
  1371. xad = &sp->xad[XTENTRYSTART];
  1372. XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
  1373. /* update page header of root */
  1374. sp->header.flag &= ~BT_LEAF;
  1375. sp->header.flag |= BT_INTERNAL;
  1376. sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
  1377. if (!test_cflag(COMMIT_Nolink, ip)) {
  1378. tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
  1379. xtlck = (struct xtlock *) & tlck->lock;
  1380. xtlck->lwm.offset = XTENTRYSTART;
  1381. xtlck->lwm.length = 1;
  1382. }
  1383. *rmpp = rmp;
  1384. jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp);
  1385. return 0;
  1386. }
  1387. /*
  1388. * xtExtend()
  1389. *
  1390. * function: extend in-place;
  1391. *
  1392. * note: existing extent may or may not have been committed.
  1393. * caller is responsible for pager buffer cache update, and
  1394. * working block allocation map update;
  1395. * update pmap: alloc whole extended extent;
  1396. */
  1397. int xtExtend(tid_t tid, /* transaction id */
  1398. struct inode *ip, s64 xoff, /* delta extent offset */
  1399. s32 xlen, /* delta extent length */
  1400. int flag)
  1401. {
  1402. int rc = 0;
  1403. int cmp;
  1404. struct metapage *mp; /* meta-page buffer */
  1405. xtpage_t *p; /* base B+-tree index page */
  1406. s64 bn;
  1407. int index, nextindex, len;
  1408. struct btstack btstack; /* traverse stack */
  1409. struct xtsplit split; /* split information */
  1410. xad_t *xad;
  1411. s64 xaddr;
  1412. struct tlock *tlck;
  1413. struct xtlock *xtlck = NULL;
  1414. jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
  1415. /* there must exist extent to be extended */
  1416. if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT)))
  1417. return rc;
  1418. /* retrieve search result */
  1419. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  1420. if (cmp != 0) {
  1421. XT_PUTPAGE(mp);
  1422. jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent");
  1423. return -EIO;
  1424. }
  1425. /* extension must be contiguous */
  1426. xad = &p->xad[index];
  1427. if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) {
  1428. XT_PUTPAGE(mp);
  1429. jfs_error(ip->i_sb, "xtExtend: extension is not contiguous");
  1430. return -EIO;
  1431. }
  1432. /*
  1433. * acquire a transaction lock on the leaf page;
  1434. *
  1435. * action: xad insertion/extension;
  1436. */
  1437. BT_MARK_DIRTY(mp, ip);
  1438. if (!test_cflag(COMMIT_Nolink, ip)) {
  1439. tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
  1440. xtlck = (struct xtlock *) & tlck->lock;
  1441. }
  1442. /* extend will overflow extent ? */
  1443. xlen = lengthXAD(xad) + xlen;
  1444. if ((len = xlen - MAXXLEN) <= 0)
  1445. goto extendOld;
  1446. /*
  1447. * extent overflow: insert entry for new extent
  1448. */
  1449. //insertNew:
  1450. xoff = offsetXAD(xad) + MAXXLEN;
  1451. xaddr = addressXAD(xad) + MAXXLEN;
  1452. nextindex = le16_to_cpu(p->header.nextindex);
  1453. /*
  1454. * if the leaf page is full, insert the new entry and
  1455. * propagate up the router entry for the new page from split
  1456. *
  1457. * The xtSplitUp() will insert the entry and unpin the leaf page.
  1458. */
  1459. if (nextindex == le16_to_cpu(p->header.maxentry)) {
  1460. /* xtSpliUp() unpins leaf pages */
  1461. split.mp = mp;
  1462. split.index = index + 1;
  1463. split.flag = XAD_NEW;
  1464. split.off = xoff; /* split offset */
  1465. split.len = len;
  1466. split.addr = xaddr;
  1467. split.pxdlist = NULL;
  1468. if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
  1469. return rc;
  1470. /* get back old page */
  1471. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  1472. if (rc)
  1473. return rc;
  1474. /*
  1475. * if leaf root has been split, original root has been
  1476. * copied to new child page, i.e., original entry now
  1477. * resides on the new child page;
  1478. */
  1479. if (p->header.flag & BT_INTERNAL) {
  1480. ASSERT(p->header.nextindex ==
  1481. cpu_to_le16(XTENTRYSTART + 1));
  1482. xad = &p->xad[XTENTRYSTART];
  1483. bn = addressXAD(xad);
  1484. XT_PUTPAGE(mp);
  1485. /* get new child page */
  1486. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  1487. if (rc)
  1488. return rc;
  1489. BT_MARK_DIRTY(mp, ip);
  1490. if (!test_cflag(COMMIT_Nolink, ip)) {
  1491. tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
  1492. xtlck = (struct xtlock *) & tlck->lock;
  1493. }
  1494. }
  1495. }
  1496. /*
  1497. * insert the new entry into the leaf page
  1498. */
  1499. else {
  1500. /* insert the new entry: mark the entry NEW */
  1501. xad = &p->xad[index + 1];
  1502. XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
  1503. /* advance next available entry index */
  1504. le16_add_cpu(&p->header.nextindex, 1);
  1505. }
  1506. /* get back old entry */
  1507. xad = &p->xad[index];
  1508. xlen = MAXXLEN;
  1509. /*
  1510. * extend old extent
  1511. */
  1512. extendOld:
  1513. XADlength(xad, xlen);
  1514. if (!(xad->flag & XAD_NEW))
  1515. xad->flag |= XAD_EXTENDED;
  1516. if (!test_cflag(COMMIT_Nolink, ip)) {
  1517. xtlck->lwm.offset =
  1518. (xtlck->lwm.offset) ? min(index,
  1519. (int)xtlck->lwm.offset) : index;
  1520. xtlck->lwm.length =
  1521. le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
  1522. }
  1523. /* unpin the leaf page */
  1524. XT_PUTPAGE(mp);
  1525. return rc;
  1526. }
  1527. #ifdef _NOTYET
  1528. /*
  1529. * xtTailgate()
  1530. *
  1531. * function: split existing 'tail' extent
  1532. * (split offset >= start offset of tail extent), and
  1533. * relocate and extend the split tail half;
  1534. *
  1535. * note: existing extent may or may not have been committed.
  1536. * caller is responsible for pager buffer cache update, and
  1537. * working block allocation map update;
  1538. * update pmap: free old split tail extent, alloc new extent;
  1539. */
  1540. int xtTailgate(tid_t tid, /* transaction id */
  1541. struct inode *ip, s64 xoff, /* split/new extent offset */
  1542. s32 xlen, /* new extent length */
  1543. s64 xaddr, /* new extent address */
  1544. int flag)
  1545. {
  1546. int rc = 0;
  1547. int cmp;
  1548. struct metapage *mp; /* meta-page buffer */
  1549. xtpage_t *p; /* base B+-tree index page */
  1550. s64 bn;
  1551. int index, nextindex, llen, rlen;
  1552. struct btstack btstack; /* traverse stack */
  1553. struct xtsplit split; /* split information */
  1554. xad_t *xad;
  1555. struct tlock *tlck;
  1556. struct xtlock *xtlck = 0;
  1557. struct tlock *mtlck;
  1558. struct maplock *pxdlock;
  1559. /*
  1560. printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
  1561. (ulong)xoff, xlen, (ulong)xaddr);
  1562. */
  1563. /* there must exist extent to be tailgated */
  1564. if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT)))
  1565. return rc;
  1566. /* retrieve search result */
  1567. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  1568. if (cmp != 0) {
  1569. XT_PUTPAGE(mp);
  1570. jfs_error(ip->i_sb, "xtTailgate: couldn't find extent");
  1571. return -EIO;
  1572. }
  1573. /* entry found must be last entry */
  1574. nextindex = le16_to_cpu(p->header.nextindex);
  1575. if (index != nextindex - 1) {
  1576. XT_PUTPAGE(mp);
  1577. jfs_error(ip->i_sb,
  1578. "xtTailgate: the entry found is not the last entry");
  1579. return -EIO;
  1580. }
  1581. BT_MARK_DIRTY(mp, ip);
  1582. /*
  1583. * acquire tlock of the leaf page containing original entry
  1584. */
  1585. if (!test_cflag(COMMIT_Nolink, ip)) {
  1586. tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
  1587. xtlck = (struct xtlock *) & tlck->lock;
  1588. }
  1589. /* completely replace extent ? */
  1590. xad = &p->xad[index];
  1591. /*
  1592. printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
  1593. (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
  1594. */
  1595. if ((llen = xoff - offsetXAD(xad)) == 0)
  1596. goto updateOld;
  1597. /*
  1598. * partially replace extent: insert entry for new extent
  1599. */
  1600. //insertNew:
  1601. /*
  1602. * if the leaf page is full, insert the new entry and
  1603. * propagate up the router entry for the new page from split
  1604. *
  1605. * The xtSplitUp() will insert the entry and unpin the leaf page.
  1606. */
  1607. if (nextindex == le16_to_cpu(p->header.maxentry)) {
  1608. /* xtSpliUp() unpins leaf pages */
  1609. split.mp = mp;
  1610. split.index = index + 1;
  1611. split.flag = XAD_NEW;
  1612. split.off = xoff; /* split offset */
  1613. split.len = xlen;
  1614. split.addr = xaddr;
  1615. split.pxdlist = NULL;
  1616. if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
  1617. return rc;
  1618. /* get back old page */
  1619. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  1620. if (rc)
  1621. return rc;
  1622. /*
  1623. * if leaf root has been split, original root has been
  1624. * copied to new child page, i.e., original entry now
  1625. * resides on the new child page;
  1626. */
  1627. if (p->header.flag & BT_INTERNAL) {
  1628. ASSERT(p->header.nextindex ==
  1629. cpu_to_le16(XTENTRYSTART + 1));
  1630. xad = &p->xad[XTENTRYSTART];
  1631. bn = addressXAD(xad);
  1632. XT_PUTPAGE(mp);
  1633. /* get new child page */
  1634. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  1635. if (rc)
  1636. return rc;
  1637. BT_MARK_DIRTY(mp, ip);
  1638. if (!test_cflag(COMMIT_Nolink, ip)) {
  1639. tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
  1640. xtlck = (struct xtlock *) & tlck->lock;
  1641. }
  1642. }
  1643. }
  1644. /*
  1645. * insert the new entry into the leaf page
  1646. */
  1647. else {
  1648. /* insert the new entry: mark the entry NEW */
  1649. xad = &p->xad[index + 1];
  1650. XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
  1651. /* advance next available entry index */
  1652. le16_add_cpu(&p->header.nextindex, 1);
  1653. }
  1654. /* get back old XAD */
  1655. xad = &p->xad[index];
  1656. /*
  1657. * truncate/relocate old extent at split offset
  1658. */
  1659. updateOld:
  1660. /* update dmap for old/committed/truncated extent */
  1661. rlen = lengthXAD(xad) - llen;
  1662. if (!(xad->flag & XAD_NEW)) {
  1663. /* free from PWMAP at commit */
  1664. if (!test_cflag(COMMIT_Nolink, ip)) {
  1665. mtlck = txMaplock(tid, ip, tlckMAP);
  1666. pxdlock = (struct maplock *) & mtlck->lock;
  1667. pxdlock->flag = mlckFREEPXD;
  1668. PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen);
  1669. PXDlength(&pxdlock->pxd, rlen);
  1670. pxdlock->index = 1;
  1671. }
  1672. } else
  1673. /* free from WMAP */
  1674. dbFree(ip, addressXAD(xad) + llen, (s64) rlen);
  1675. if (llen)
  1676. /* truncate */
  1677. XADlength(xad, llen);
  1678. else
  1679. /* replace */
  1680. XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
  1681. if (!test_cflag(COMMIT_Nolink, ip)) {
  1682. xtlck->lwm.offset = (xtlck->lwm.offset) ?
  1683. min(index, (int)xtlck->lwm.offset) : index;
  1684. xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
  1685. xtlck->lwm.offset;
  1686. }
  1687. /* unpin the leaf page */
  1688. XT_PUTPAGE(mp);
  1689. return rc;
  1690. }
  1691. #endif /* _NOTYET */
  1692. /*
  1693. * xtUpdate()
  1694. *
  1695. * function: update XAD;
  1696. *
  1697. * update extent for allocated_but_not_recorded or
  1698. * compressed extent;
  1699. *
  1700. * parameter:
  1701. * nxad - new XAD;
  1702. * logical extent of the specified XAD must be completely
  1703. * contained by an existing XAD;
  1704. */
  1705. int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
  1706. { /* new XAD */
  1707. int rc = 0;
  1708. int cmp;
  1709. struct metapage *mp; /* meta-page buffer */
  1710. xtpage_t *p; /* base B+-tree index page */
  1711. s64 bn;
  1712. int index0, index, newindex, nextindex;
  1713. struct btstack btstack; /* traverse stack */
  1714. struct xtsplit split; /* split information */
  1715. xad_t *xad, *lxad, *rxad;
  1716. int xflag;
  1717. s64 nxoff, xoff;
  1718. int nxlen, xlen, lxlen, rxlen;
  1719. s64 nxaddr, xaddr;
  1720. struct tlock *tlck;
  1721. struct xtlock *xtlck = NULL;
  1722. int newpage = 0;
  1723. /* there must exist extent to be tailgated */
  1724. nxoff = offsetXAD(nxad);
  1725. nxlen = lengthXAD(nxad);
  1726. nxaddr = addressXAD(nxad);
  1727. if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
  1728. return rc;
  1729. /* retrieve search result */
  1730. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
  1731. if (cmp != 0) {
  1732. XT_PUTPAGE(mp);
  1733. jfs_error(ip->i_sb, "xtUpdate: Could not find extent");
  1734. return -EIO;
  1735. }
  1736. BT_MARK_DIRTY(mp, ip);
  1737. /*
  1738. * acquire tlock of the leaf page containing original entry
  1739. */
  1740. if (!test_cflag(COMMIT_Nolink, ip)) {
  1741. tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
  1742. xtlck = (struct xtlock *) & tlck->lock;
  1743. }
  1744. xad = &p->xad[index0];
  1745. xflag = xad->flag;
  1746. xoff = offsetXAD(xad);
  1747. xlen = lengthXAD(xad);
  1748. xaddr = addressXAD(xad);
  1749. /* nXAD must be completely contained within XAD */
  1750. if ((xoff > nxoff) ||
  1751. (nxoff + nxlen > xoff + xlen)) {
  1752. XT_PUTPAGE(mp);
  1753. jfs_error(ip->i_sb,
  1754. "xtUpdate: nXAD in not completely contained within XAD");
  1755. return -EIO;
  1756. }
  1757. index = index0;
  1758. newindex = index + 1;
  1759. nextindex = le16_to_cpu(p->header.nextindex);
  1760. #ifdef _JFS_WIP_NOCOALESCE
  1761. if (xoff < nxoff)
  1762. goto updateRight;
  1763. /*
  1764. * replace XAD with nXAD
  1765. */
  1766. replace: /* (nxoff == xoff) */
  1767. if (nxlen == xlen) {
  1768. /* replace XAD with nXAD:recorded */
  1769. *xad = *nxad;
  1770. xad->flag = xflag & ~XAD_NOTRECORDED;
  1771. goto out;
  1772. } else /* (nxlen < xlen) */
  1773. goto updateLeft;
  1774. #endif /* _JFS_WIP_NOCOALESCE */
  1775. /* #ifdef _JFS_WIP_COALESCE */
  1776. if (xoff < nxoff)
  1777. goto coalesceRight;
  1778. /*
  1779. * coalesce with left XAD
  1780. */
  1781. //coalesceLeft: /* (xoff == nxoff) */
  1782. /* is XAD first entry of page ? */
  1783. if (index == XTENTRYSTART)
  1784. goto replace;
  1785. /* is nXAD logically and physically contiguous with lXAD ? */
  1786. lxad = &p->xad[index - 1];
  1787. lxlen = lengthXAD(lxad);
  1788. if (!(lxad->flag & XAD_NOTRECORDED) &&
  1789. (nxoff == offsetXAD(lxad) + lxlen) &&
  1790. (nxaddr == addressXAD(lxad) + lxlen) &&
  1791. (lxlen + nxlen < MAXXLEN)) {
  1792. /* extend right lXAD */
  1793. index0 = index - 1;
  1794. XADlength(lxad, lxlen + nxlen);
  1795. /* If we just merged two extents together, need to make sure the
  1796. * right extent gets logged. If the left one is marked XAD_NEW,
  1797. * then we know it will be logged. Otherwise, mark as
  1798. * XAD_EXTENDED
  1799. */
  1800. if (!(lxad->flag & XAD_NEW))
  1801. lxad->flag |= XAD_EXTENDED;
  1802. if (xlen > nxlen) {
  1803. /* truncate XAD */
  1804. XADoffset(xad, xoff + nxlen);
  1805. XADlength(xad, xlen - nxlen);
  1806. XADaddress(xad, xaddr + nxlen);
  1807. goto out;
  1808. } else { /* (xlen == nxlen) */
  1809. /* remove XAD */
  1810. if (index < nextindex - 1)
  1811. memmove(&p->xad[index], &p->xad[index + 1],
  1812. (nextindex - index -
  1813. 1) << L2XTSLOTSIZE);
  1814. p->header.nextindex =
  1815. cpu_to_le16(le16_to_cpu(p->header.nextindex) -
  1816. 1);
  1817. index = index0;
  1818. newindex = index + 1;
  1819. nextindex = le16_to_cpu(p->header.nextindex);
  1820. xoff = nxoff = offsetXAD(lxad);
  1821. xlen = nxlen = lxlen + nxlen;
  1822. xaddr = nxaddr = addressXAD(lxad);
  1823. goto coalesceRight;
  1824. }
  1825. }
  1826. /*
  1827. * replace XAD with nXAD
  1828. */
  1829. replace: /* (nxoff == xoff) */
  1830. if (nxlen == xlen) {
  1831. /* replace XAD with nXAD:recorded */
  1832. *xad = *nxad;
  1833. xad->flag = xflag & ~XAD_NOTRECORDED;
  1834. goto coalesceRight;
  1835. } else /* (nxlen < xlen) */
  1836. goto updateLeft;
  1837. /*
  1838. * coalesce with right XAD
  1839. */
  1840. coalesceRight: /* (xoff <= nxoff) */
  1841. /* is XAD last entry of page ? */
  1842. if (newindex == nextindex) {
  1843. if (xoff == nxoff)
  1844. goto out;
  1845. goto updateRight;
  1846. }
  1847. /* is nXAD logically and physically contiguous with rXAD ? */
  1848. rxad = &p->xad[index + 1];
  1849. rxlen = lengthXAD(rxad);
  1850. if (!(rxad->flag & XAD_NOTRECORDED) &&
  1851. (nxoff + nxlen == offsetXAD(rxad)) &&
  1852. (nxaddr + nxlen == addressXAD(rxad)) &&
  1853. (rxlen + nxlen < MAXXLEN)) {
  1854. /* extend left rXAD */
  1855. XADoffset(rxad, nxoff);
  1856. XADlength(rxad, rxlen + nxlen);
  1857. XADaddress(rxad, nxaddr);
  1858. /* If we just merged two extents together, need to make sure
  1859. * the left extent gets logged. If the right one is marked
  1860. * XAD_NEW, then we know it will be logged. Otherwise, mark as
  1861. * XAD_EXTENDED
  1862. */
  1863. if (!(rxad->flag & XAD_NEW))
  1864. rxad->flag |= XAD_EXTENDED;
  1865. if (xlen > nxlen)
  1866. /* truncate XAD */
  1867. XADlength(xad, xlen - nxlen);
  1868. else { /* (xlen == nxlen) */
  1869. /* remove XAD */
  1870. memmove(&p->xad[index], &p->xad[index + 1],
  1871. (nextindex - index - 1) << L2XTSLOTSIZE);
  1872. p->header.nextindex =
  1873. cpu_to_le16(le16_to_cpu(p->header.nextindex) -
  1874. 1);
  1875. }
  1876. goto out;
  1877. } else if (xoff == nxoff)
  1878. goto out;
  1879. if (xoff >= nxoff) {
  1880. XT_PUTPAGE(mp);
  1881. jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff");
  1882. return -EIO;
  1883. }
  1884. /* #endif _JFS_WIP_COALESCE */
  1885. /*
  1886. * split XAD into (lXAD, nXAD):
  1887. *
  1888. * |---nXAD--->
  1889. * --|----------XAD----------|--
  1890. * |-lXAD-|
  1891. */
  1892. updateRight: /* (xoff < nxoff) */
  1893. /* truncate old XAD as lXAD:not_recorded */
  1894. xad = &p->xad[index];
  1895. XADlength(xad, nxoff - xoff);
  1896. /* insert nXAD:recorded */
  1897. if (nextindex == le16_to_cpu(p->header.maxentry)) {
  1898. /* xtSpliUp() unpins leaf pages */
  1899. split.mp = mp;
  1900. split.index = newindex;
  1901. split.flag = xflag & ~XAD_NOTRECORDED;
  1902. split.off = nxoff;
  1903. split.len = nxlen;
  1904. split.addr = nxaddr;
  1905. split.pxdlist = NULL;
  1906. if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
  1907. return rc;
  1908. /* get back old page */
  1909. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  1910. if (rc)
  1911. return rc;
  1912. /*
  1913. * if leaf root has been split, original root has been
  1914. * copied to new child page, i.e., original entry now
  1915. * resides on the new child page;
  1916. */
  1917. if (p->header.flag & BT_INTERNAL) {
  1918. ASSERT(p->header.nextindex ==
  1919. cpu_to_le16(XTENTRYSTART + 1));
  1920. xad = &p->xad[XTENTRYSTART];
  1921. bn = addressXAD(xad);
  1922. XT_PUTPAGE(mp);
  1923. /* get new child page */
  1924. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  1925. if (rc)
  1926. return rc;
  1927. BT_MARK_DIRTY(mp, ip);
  1928. if (!test_cflag(COMMIT_Nolink, ip)) {
  1929. tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
  1930. xtlck = (struct xtlock *) & tlck->lock;
  1931. }
  1932. } else {
  1933. /* is nXAD on new page ? */
  1934. if (newindex >
  1935. (le16_to_cpu(p->header.maxentry) >> 1)) {
  1936. newindex =
  1937. newindex -
  1938. le16_to_cpu(p->header.nextindex) +
  1939. XTENTRYSTART;
  1940. newpage = 1;
  1941. }
  1942. }
  1943. } else {
  1944. /* if insert into middle, shift right remaining entries */
  1945. if (newindex < nextindex)
  1946. memmove(&p->xad[newindex + 1], &p->xad[newindex],
  1947. (nextindex - newindex) << L2XTSLOTSIZE);
  1948. /* insert the entry */
  1949. xad = &p->xad[newindex];
  1950. *xad = *nxad;
  1951. xad->flag = xflag & ~XAD_NOTRECORDED;
  1952. /* advance next available entry index. */
  1953. p->header.nextindex =
  1954. cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
  1955. }
  1956. /*
  1957. * does nXAD force 3-way split ?
  1958. *
  1959. * |---nXAD--->|
  1960. * --|----------XAD-------------|--
  1961. * |-lXAD-| |-rXAD -|
  1962. */
  1963. if (nxoff + nxlen == xoff + xlen)
  1964. goto out;
  1965. /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
  1966. if (newpage) {
  1967. /* close out old page */
  1968. if (!test_cflag(COMMIT_Nolink, ip)) {
  1969. xtlck->lwm.offset = (xtlck->lwm.offset) ?
  1970. min(index0, (int)xtlck->lwm.offset) : index0;
  1971. xtlck->lwm.length =
  1972. le16_to_cpu(p->header.nextindex) -
  1973. xtlck->lwm.offset;
  1974. }
  1975. bn = le64_to_cpu(p->header.next);
  1976. XT_PUTPAGE(mp);
  1977. /* get new right page */
  1978. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  1979. if (rc)
  1980. return rc;
  1981. BT_MARK_DIRTY(mp, ip);
  1982. if (!test_cflag(COMMIT_Nolink, ip)) {
  1983. tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
  1984. xtlck = (struct xtlock *) & tlck->lock;
  1985. }
  1986. index0 = index = newindex;
  1987. } else
  1988. index++;
  1989. newindex = index + 1;
  1990. nextindex = le16_to_cpu(p->header.nextindex);
  1991. xlen = xlen - (nxoff - xoff);
  1992. xoff = nxoff;
  1993. xaddr = nxaddr;
  1994. /* recompute split pages */
  1995. if (nextindex == le16_to_cpu(p->header.maxentry)) {
  1996. XT_PUTPAGE(mp);
  1997. if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
  1998. return rc;
  1999. /* retrieve search result */
  2000. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
  2001. if (cmp != 0) {
  2002. XT_PUTPAGE(mp);
  2003. jfs_error(ip->i_sb, "xtUpdate: xtSearch failed");
  2004. return -EIO;
  2005. }
  2006. if (index0 != index) {
  2007. XT_PUTPAGE(mp);
  2008. jfs_error(ip->i_sb,
  2009. "xtUpdate: unexpected value of index");
  2010. return -EIO;
  2011. }
  2012. }
  2013. /*
  2014. * split XAD into (nXAD, rXAD)
  2015. *
  2016. * ---nXAD---|
  2017. * --|----------XAD----------|--
  2018. * |-rXAD-|
  2019. */
  2020. updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */
  2021. /* update old XAD with nXAD:recorded */
  2022. xad = &p->xad[index];
  2023. *xad = *nxad;
  2024. xad->flag = xflag & ~XAD_NOTRECORDED;
  2025. /* insert rXAD:not_recorded */
  2026. xoff = xoff + nxlen;
  2027. xlen = xlen - nxlen;
  2028. xaddr = xaddr + nxlen;
  2029. if (nextindex == le16_to_cpu(p->header.maxentry)) {
  2030. /*
  2031. printf("xtUpdate.updateLeft.split p:0x%p\n", p);
  2032. */
  2033. /* xtSpliUp() unpins leaf pages */
  2034. split.mp = mp;
  2035. split.index = newindex;
  2036. split.flag = xflag;
  2037. split.off = xoff;
  2038. split.len = xlen;
  2039. split.addr = xaddr;
  2040. split.pxdlist = NULL;
  2041. if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
  2042. return rc;
  2043. /* get back old page */
  2044. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  2045. if (rc)
  2046. return rc;
  2047. /*
  2048. * if leaf root has been split, original root has been
  2049. * copied to new child page, i.e., original entry now
  2050. * resides on the new child page;
  2051. */
  2052. if (p->header.flag & BT_INTERNAL) {
  2053. ASSERT(p->header.nextindex ==
  2054. cpu_to_le16(XTENTRYSTART + 1));
  2055. xad = &p->xad[XTENTRYSTART];
  2056. bn = addressXAD(xad);
  2057. XT_PUTPAGE(mp);
  2058. /* get new child page */
  2059. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  2060. if (rc)
  2061. return rc;
  2062. BT_MARK_DIRTY(mp, ip);
  2063. if (!test_cflag(COMMIT_Nolink, ip)) {
  2064. tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
  2065. xtlck = (struct xtlock *) & tlck->lock;
  2066. }
  2067. }
  2068. } else {
  2069. /* if insert into middle, shift right remaining entries */
  2070. if (newindex < nextindex)
  2071. memmove(&p->xad[newindex + 1], &p->xad[newindex],
  2072. (nextindex - newindex) << L2XTSLOTSIZE);
  2073. /* insert the entry */
  2074. xad = &p->xad[newindex];
  2075. XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
  2076. /* advance next available entry index. */
  2077. p->header.nextindex =
  2078. cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
  2079. }
  2080. out:
  2081. if (!test_cflag(COMMIT_Nolink, ip)) {
  2082. xtlck->lwm.offset = (xtlck->lwm.offset) ?
  2083. min(index0, (int)xtlck->lwm.offset) : index0;
  2084. xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
  2085. xtlck->lwm.offset;
  2086. }
  2087. /* unpin the leaf page */
  2088. XT_PUTPAGE(mp);
  2089. return rc;
  2090. }
  2091. /*
  2092. * xtAppend()
  2093. *
  2094. * function: grow in append mode from contiguous region specified ;
  2095. *
  2096. * parameter:
  2097. * tid - transaction id;
  2098. * ip - file object;
  2099. * xflag - extent flag:
  2100. * xoff - extent offset;
  2101. * maxblocks - max extent length;
  2102. * xlen - extent length (in/out);
  2103. * xaddrp - extent address pointer (in/out):
  2104. * flag -
  2105. *
  2106. * return:
  2107. */
  2108. int xtAppend(tid_t tid, /* transaction id */
  2109. struct inode *ip, int xflag, s64 xoff, s32 maxblocks,
  2110. s32 * xlenp, /* (in/out) */
  2111. s64 * xaddrp, /* (in/out) */
  2112. int flag)
  2113. {
  2114. int rc = 0;
  2115. struct metapage *mp; /* meta-page buffer */
  2116. xtpage_t *p; /* base B+-tree index page */
  2117. s64 bn, xaddr;
  2118. int index, nextindex;
  2119. struct btstack btstack; /* traverse stack */
  2120. struct xtsplit split; /* split information */
  2121. xad_t *xad;
  2122. int cmp;
  2123. struct tlock *tlck;
  2124. struct xtlock *xtlck;
  2125. int nsplit, nblocks, xlen;
  2126. struct pxdlist pxdlist;
  2127. pxd_t *pxd;
  2128. s64 next;
  2129. xaddr = *xaddrp;
  2130. xlen = *xlenp;
  2131. jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx",
  2132. (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
  2133. /*
  2134. * search for the entry location at which to insert:
  2135. *
  2136. * xtFastSearch() and xtSearch() both returns (leaf page
  2137. * pinned, index at which to insert).
  2138. * n.b. xtSearch() may return index of maxentry of
  2139. * the full page.
  2140. */
  2141. if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
  2142. return rc;
  2143. /* retrieve search result */
  2144. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  2145. if (cmp == 0) {
  2146. rc = -EEXIST;
  2147. goto out;
  2148. }
  2149. if (next)
  2150. xlen = min(xlen, (int)(next - xoff));
  2151. //insert:
  2152. /*
  2153. * insert entry for new extent
  2154. */
  2155. xflag |= XAD_NEW;
  2156. /*
  2157. * if the leaf page is full, split the page and
  2158. * propagate up the router entry for the new page from split
  2159. *
  2160. * The xtSplitUp() will insert the entry and unpin the leaf page.
  2161. */
  2162. nextindex = le16_to_cpu(p->header.nextindex);
  2163. if (nextindex < le16_to_cpu(p->header.maxentry))
  2164. goto insertLeaf;
  2165. /*
  2166. * allocate new index blocks to cover index page split(s)
  2167. */
  2168. nsplit = btstack.nsplit;
  2169. split.pxdlist = &pxdlist;
  2170. pxdlist.maxnpxd = pxdlist.npxd = 0;
  2171. pxd = &pxdlist.pxd[0];
  2172. nblocks = JFS_SBI(ip->i_sb)->nbperpage;
  2173. for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {
  2174. if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
  2175. PXDaddress(pxd, xaddr);
  2176. PXDlength(pxd, nblocks);
  2177. pxdlist.maxnpxd++;
  2178. continue;
  2179. }
  2180. /* undo allocation */
  2181. goto out;
  2182. }
  2183. xlen = min(xlen, maxblocks);
  2184. /*
  2185. * allocate data extent requested
  2186. */
  2187. if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
  2188. goto out;
  2189. split.mp = mp;
  2190. split.index = index;
  2191. split.flag = xflag;
  2192. split.off = xoff;
  2193. split.len = xlen;
  2194. split.addr = xaddr;
  2195. if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
  2196. /* undo data extent allocation */
  2197. dbFree(ip, *xaddrp, (s64) * xlenp);
  2198. return rc;
  2199. }
  2200. *xaddrp = xaddr;
  2201. *xlenp = xlen;
  2202. return 0;
  2203. /*
  2204. * insert the new entry into the leaf page
  2205. */
  2206. insertLeaf:
  2207. /*
  2208. * allocate data extent requested
  2209. */
  2210. if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
  2211. goto out;
  2212. BT_MARK_DIRTY(mp, ip);
  2213. /*
  2214. * acquire a transaction lock on the leaf page;
  2215. *
  2216. * action: xad insertion/extension;
  2217. */
  2218. tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
  2219. xtlck = (struct xtlock *) & tlck->lock;
  2220. /* insert the new entry: mark the entry NEW */
  2221. xad = &p->xad[index];
  2222. XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
  2223. /* advance next available entry index */
  2224. le16_add_cpu(&p->header.nextindex, 1);
  2225. xtlck->lwm.offset =
  2226. (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
  2227. xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
  2228. xtlck->lwm.offset;
  2229. *xaddrp = xaddr;
  2230. *xlenp = xlen;
  2231. out:
  2232. /* unpin the leaf page */
  2233. XT_PUTPAGE(mp);
  2234. return rc;
  2235. }
  2236. #ifdef _STILL_TO_PORT
  2237. /* - TBD for defragmentaion/reorganization -
  2238. *
  2239. * xtDelete()
  2240. *
  2241. * function:
  2242. * delete the entry with the specified key.
  2243. *
  2244. * N.B.: whole extent of the entry is assumed to be deleted.
  2245. *
  2246. * parameter:
  2247. *
  2248. * return:
  2249. * ENOENT: if the entry is not found.
  2250. *
  2251. * exception:
  2252. */
  2253. int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
  2254. {
  2255. int rc = 0;
  2256. struct btstack btstack;
  2257. int cmp;
  2258. s64 bn;
  2259. struct metapage *mp;
  2260. xtpage_t *p;
  2261. int index, nextindex;
  2262. struct tlock *tlck;
  2263. struct xtlock *xtlck;
  2264. /*
  2265. * find the matching entry; xtSearch() pins the page
  2266. */
  2267. if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
  2268. return rc;
  2269. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  2270. if (cmp) {
  2271. /* unpin the leaf page */
  2272. XT_PUTPAGE(mp);
  2273. return -ENOENT;
  2274. }
  2275. /*
  2276. * delete the entry from the leaf page
  2277. */
  2278. nextindex = le16_to_cpu(p->header.nextindex);
  2279. le16_add_cpu(&p->header.nextindex, -1);
  2280. /*
  2281. * if the leaf page bocome empty, free the page
  2282. */
  2283. if (p->header.nextindex == cpu_to_le16(XTENTRYSTART))
  2284. return (xtDeleteUp(tid, ip, mp, p, &btstack));
  2285. BT_MARK_DIRTY(mp, ip);
  2286. /*
  2287. * acquire a transaction lock on the leaf page;
  2288. *
  2289. * action:xad deletion;
  2290. */
  2291. tlck = txLock(tid, ip, mp, tlckXTREE);
  2292. xtlck = (struct xtlock *) & tlck->lock;
  2293. xtlck->lwm.offset =
  2294. (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index;
  2295. /* if delete from middle, shift left/compact the remaining entries */
  2296. if (index < nextindex - 1)
  2297. memmove(&p->xad[index], &p->xad[index + 1],
  2298. (nextindex - index - 1) * sizeof(xad_t));
  2299. XT_PUTPAGE(mp);
  2300. return 0;
  2301. }
  2302. /* - TBD for defragmentaion/reorganization -
  2303. *
  2304. * xtDeleteUp()
  2305. *
  2306. * function:
  2307. * free empty pages as propagating deletion up the tree
  2308. *
  2309. * parameter:
  2310. *
  2311. * return:
  2312. */
  2313. static int
  2314. xtDeleteUp(tid_t tid, struct inode *ip,
  2315. struct metapage * fmp, xtpage_t * fp, struct btstack * btstack)
  2316. {
  2317. int rc = 0;
  2318. struct metapage *mp;
  2319. xtpage_t *p;
  2320. int index, nextindex;
  2321. s64 xaddr;
  2322. int xlen;
  2323. struct btframe *parent;
  2324. struct tlock *tlck;
  2325. struct xtlock *xtlck;
  2326. /*
  2327. * keep root leaf page which has become empty
  2328. */
  2329. if (fp->header.flag & BT_ROOT) {
  2330. /* keep the root page */
  2331. fp->header.flag &= ~BT_INTERNAL;
  2332. fp->header.flag |= BT_LEAF;
  2333. fp->header.nextindex = cpu_to_le16(XTENTRYSTART);
  2334. /* XT_PUTPAGE(fmp); */
  2335. return 0;
  2336. }
  2337. /*
  2338. * free non-root leaf page
  2339. */
  2340. if ((rc = xtRelink(tid, ip, fp))) {
  2341. XT_PUTPAGE(fmp);
  2342. return rc;
  2343. }
  2344. xaddr = addressPXD(&fp->header.self);
  2345. xlen = lengthPXD(&fp->header.self);
  2346. /* free the page extent */
  2347. dbFree(ip, xaddr, (s64) xlen);
  2348. /* free the buffer page */
  2349. discard_metapage(fmp);
  2350. /*
  2351. * propagate page deletion up the index tree
  2352. *
  2353. * If the delete from the parent page makes it empty,
  2354. * continue all the way up the tree.
  2355. * stop if the root page is reached (which is never deleted) or
  2356. * if the entry deletion does not empty the page.
  2357. */
  2358. while ((parent = BT_POP(btstack)) != NULL) {
  2359. /* get/pin the parent page <sp> */
  2360. XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
  2361. if (rc)
  2362. return rc;
  2363. index = parent->index;
  2364. /* delete the entry for the freed child page from parent.
  2365. */
  2366. nextindex = le16_to_cpu(p->header.nextindex);
  2367. /*
  2368. * the parent has the single entry being deleted:
  2369. * free the parent page which has become empty.
  2370. */
  2371. if (nextindex == 1) {
  2372. if (p->header.flag & BT_ROOT) {
  2373. /* keep the root page */
  2374. p->header.flag &= ~BT_INTERNAL;
  2375. p->header.flag |= BT_LEAF;
  2376. p->header.nextindex =
  2377. cpu_to_le16(XTENTRYSTART);
  2378. /* XT_PUTPAGE(mp); */
  2379. break;
  2380. } else {
  2381. /* free the parent page */
  2382. if ((rc = xtRelink(tid, ip, p)))
  2383. return rc;
  2384. xaddr = addressPXD(&p->header.self);
  2385. /* free the page extent */
  2386. dbFree(ip, xaddr,
  2387. (s64) JFS_SBI(ip->i_sb)->nbperpage);
  2388. /* unpin/free the buffer page */
  2389. discard_metapage(mp);
  2390. /* propagate up */
  2391. continue;
  2392. }
  2393. }
  2394. /*
  2395. * the parent has other entries remaining:
  2396. * delete the router entry from the parent page.
  2397. */
  2398. else {
  2399. BT_MARK_DIRTY(mp, ip);
  2400. /*
  2401. * acquire a transaction lock on the leaf page;
  2402. *
  2403. * action:xad deletion;
  2404. */
  2405. tlck = txLock(tid, ip, mp, tlckXTREE);
  2406. xtlck = (struct xtlock *) & tlck->lock;
  2407. xtlck->lwm.offset =
  2408. (xtlck->lwm.offset) ? min(index,
  2409. xtlck->lwm.
  2410. offset) : index;
  2411. /* if delete from middle,
  2412. * shift left/compact the remaining entries in the page
  2413. */
  2414. if (index < nextindex - 1)
  2415. memmove(&p->xad[index], &p->xad[index + 1],
  2416. (nextindex - index -
  2417. 1) << L2XTSLOTSIZE);
  2418. le16_add_cpu(&p->header.nextindex, -1);
  2419. jfs_info("xtDeleteUp(entry): 0x%lx[%d]",
  2420. (ulong) parent->bn, index);
  2421. }
  2422. /* unpin the parent page */
  2423. XT_PUTPAGE(mp);
  2424. /* exit propagation up */
  2425. break;
  2426. }
  2427. return 0;
  2428. }
  2429. /*
  2430. * NAME: xtRelocate()
  2431. *
  2432. * FUNCTION: relocate xtpage or data extent of regular file;
  2433. * This function is mainly used by defragfs utility.
  2434. *
  2435. * NOTE: This routine does not have the logic to handle
  2436. * uncommitted allocated extent. The caller should call
  2437. * txCommit() to commit all the allocation before call
  2438. * this routine.
  2439. */
  2440. int
  2441. xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
  2442. s64 nxaddr, /* new xaddr */
  2443. int xtype)
  2444. { /* extent type: XTPAGE or DATAEXT */
  2445. int rc = 0;
  2446. struct tblock *tblk;
  2447. struct tlock *tlck;
  2448. struct xtlock *xtlck;
  2449. struct metapage *mp, *pmp, *lmp, *rmp; /* meta-page buffer */
  2450. xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */
  2451. xad_t *xad;
  2452. pxd_t *pxd;
  2453. s64 xoff, xsize;
  2454. int xlen;
  2455. s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn;
  2456. cbuf_t *cp;
  2457. s64 offset, nbytes, nbrd, pno;
  2458. int nb, npages, nblks;
  2459. s64 bn;
  2460. int cmp;
  2461. int index;
  2462. struct pxd_lock *pxdlock;
  2463. struct btstack btstack; /* traverse stack */
  2464. xtype = xtype & EXTENT_TYPE;
  2465. xoff = offsetXAD(oxad);
  2466. oxaddr = addressXAD(oxad);
  2467. xlen = lengthXAD(oxad);
  2468. /* validate extent offset */
  2469. offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
  2470. if (offset >= ip->i_size)
  2471. return -ESTALE; /* stale extent */
  2472. jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx",
  2473. xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr);
  2474. /*
  2475. * 1. get and validate the parent xtpage/xad entry
  2476. * covering the source extent to be relocated;
  2477. */
  2478. if (xtype == DATAEXT) {
  2479. /* search in leaf entry */
  2480. rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
  2481. if (rc)
  2482. return rc;
  2483. /* retrieve search result */
  2484. XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
  2485. if (cmp) {
  2486. XT_PUTPAGE(pmp);
  2487. return -ESTALE;
  2488. }
  2489. /* validate for exact match with a single entry */
  2490. xad = &pp->xad[index];
  2491. if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) {
  2492. XT_PUTPAGE(pmp);
  2493. return -ESTALE;
  2494. }
  2495. } else { /* (xtype == XTPAGE) */
  2496. /* search in internal entry */
  2497. rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0);
  2498. if (rc)
  2499. return rc;
  2500. /* retrieve search result */
  2501. XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
  2502. if (cmp) {
  2503. XT_PUTPAGE(pmp);
  2504. return -ESTALE;
  2505. }
  2506. /* xtSearchNode() validated for exact match with a single entry
  2507. */
  2508. xad = &pp->xad[index];
  2509. }
  2510. jfs_info("xtRelocate: parent xad entry validated.");
  2511. /*
  2512. * 2. relocate the extent
  2513. */
  2514. if (xtype == DATAEXT) {
  2515. /* if the extent is allocated-but-not-recorded
  2516. * there is no real data to be moved in this extent,
  2517. */
  2518. if (xad->flag & XAD_NOTRECORDED)
  2519. goto out;
  2520. else
  2521. /* release xtpage for cmRead()/xtLookup() */
  2522. XT_PUTPAGE(pmp);
  2523. /*
  2524. * cmRelocate()
  2525. *
  2526. * copy target data pages to be relocated;
  2527. *
  2528. * data extent must start at page boundary and
  2529. * multiple of page size (except the last data extent);
  2530. * read in each page of the source data extent into cbuf,
  2531. * update the cbuf extent descriptor of the page to be
  2532. * homeward bound to new dst data extent
  2533. * copy the data from the old extent to new extent.
  2534. * copy is essential for compressed files to avoid problems
  2535. * that can arise if there was a change in compression
  2536. * algorithms.
  2537. * it is a good strategy because it may disrupt cache
  2538. * policy to keep the pages in memory afterwards.
  2539. */
  2540. offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
  2541. assert((offset & CM_OFFSET) == 0);
  2542. nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize;
  2543. pno = offset >> CM_L2BSIZE;
  2544. npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE;
  2545. /*
  2546. npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
  2547. (offset >> CM_L2BSIZE) + 1;
  2548. */
  2549. sxaddr = oxaddr;
  2550. dxaddr = nxaddr;
  2551. /* process the request one cache buffer at a time */
  2552. for (nbrd = 0; nbrd < nbytes; nbrd += nb,
  2553. offset += nb, pno++, npages--) {
  2554. /* compute page size */
  2555. nb = min(nbytes - nbrd, CM_BSIZE);
  2556. /* get the cache buffer of the page */
  2557. if (rc = cmRead(ip, offset, npages, &cp))
  2558. break;
  2559. assert(addressPXD(&cp->cm_pxd) == sxaddr);
  2560. assert(!cp->cm_modified);
  2561. /* bind buffer with the new extent address */
  2562. nblks = nb >> JFS_IP(ip->i_sb)->l2bsize;
  2563. cmSetXD(ip, cp, pno, dxaddr, nblks);
  2564. /* release the cbuf, mark it as modified */
  2565. cmPut(cp, true);
  2566. dxaddr += nblks;
  2567. sxaddr += nblks;
  2568. }
  2569. /* get back parent page */
  2570. if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
  2571. return rc;
  2572. XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
  2573. jfs_info("xtRelocate: target data extent relocated.");
  2574. } else { /* (xtype == XTPAGE) */
  2575. /*
  2576. * read in the target xtpage from the source extent;
  2577. */
  2578. XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
  2579. if (rc) {
  2580. XT_PUTPAGE(pmp);
  2581. return rc;
  2582. }
  2583. /*
  2584. * read in sibling pages if any to update sibling pointers;
  2585. */
  2586. rmp = NULL;
  2587. if (p->header.next) {
  2588. nextbn = le64_to_cpu(p->header.next);
  2589. XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
  2590. if (rc) {
  2591. XT_PUTPAGE(pmp);
  2592. XT_PUTPAGE(mp);
  2593. return (rc);
  2594. }
  2595. }
  2596. lmp = NULL;
  2597. if (p->header.prev) {
  2598. prevbn = le64_to_cpu(p->header.prev);
  2599. XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
  2600. if (rc) {
  2601. XT_PUTPAGE(pmp);
  2602. XT_PUTPAGE(mp);
  2603. if (rmp)
  2604. XT_PUTPAGE(rmp);
  2605. return (rc);
  2606. }
  2607. }
  2608. /* at this point, all xtpages to be updated are in memory */
  2609. /*
  2610. * update sibling pointers of sibling xtpages if any;
  2611. */
  2612. if (lmp) {
  2613. BT_MARK_DIRTY(lmp, ip);
  2614. tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
  2615. lp->header.next = cpu_to_le64(nxaddr);
  2616. XT_PUTPAGE(lmp);
  2617. }
  2618. if (rmp) {
  2619. BT_MARK_DIRTY(rmp, ip);
  2620. tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
  2621. rp->header.prev = cpu_to_le64(nxaddr);
  2622. XT_PUTPAGE(rmp);
  2623. }
  2624. /*
  2625. * update the target xtpage to be relocated
  2626. *
  2627. * update the self address of the target page
  2628. * and write to destination extent;
  2629. * redo image covers the whole xtpage since it is new page
  2630. * to the destination extent;
  2631. * update of bmap for the free of source extent
  2632. * of the target xtpage itself:
  2633. * update of bmap for the allocation of destination extent
  2634. * of the target xtpage itself:
  2635. * update of bmap for the extents covered by xad entries in
  2636. * the target xtpage is not necessary since they are not
  2637. * updated;
  2638. * if not committed before this relocation,
  2639. * target page may contain XAD_NEW entries which must
  2640. * be scanned for bmap update (logredo() always
  2641. * scan xtpage REDOPAGE image for bmap update);
  2642. * if committed before this relocation (tlckRELOCATE),
  2643. * scan may be skipped by commit() and logredo();
  2644. */
  2645. BT_MARK_DIRTY(mp, ip);
  2646. /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */
  2647. tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW);
  2648. xtlck = (struct xtlock *) & tlck->lock;
  2649. /* update the self address in the xtpage header */
  2650. pxd = &p->header.self;
  2651. PXDaddress(pxd, nxaddr);
  2652. /* linelock for the after image of the whole page */
  2653. xtlck->lwm.length =
  2654. le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
  2655. /* update the buffer extent descriptor of target xtpage */
  2656. xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
  2657. bmSetXD(mp, nxaddr, xsize);
  2658. /* unpin the target page to new homeward bound */
  2659. XT_PUTPAGE(mp);
  2660. jfs_info("xtRelocate: target xtpage relocated.");
  2661. }
  2662. /*
  2663. * 3. acquire maplock for the source extent to be freed;
  2664. *
  2665. * acquire a maplock saving the src relocated extent address;
  2666. * to free of the extent at commit time;
  2667. */
  2668. out:
  2669. /* if DATAEXT relocation, write a LOG_UPDATEMAP record for
  2670. * free PXD of the source data extent (logredo() will update
  2671. * bmap for free of source data extent), and update bmap for
  2672. * free of the source data extent;
  2673. */
  2674. if (xtype == DATAEXT)
  2675. tlck = txMaplock(tid, ip, tlckMAP);
  2676. /* if XTPAGE relocation, write a LOG_NOREDOPAGE record
  2677. * for the source xtpage (logredo() will init NoRedoPage
  2678. * filter and will also update bmap for free of the source
  2679. * xtpage), and update bmap for free of the source xtpage;
  2680. * N.B. We use tlckMAP instead of tlkcXTREE because there
  2681. * is no buffer associated with this lock since the buffer
  2682. * has been redirected to the target location.
  2683. */
  2684. else /* (xtype == XTPAGE) */
  2685. tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE);
  2686. pxdlock = (struct pxd_lock *) & tlck->lock;
  2687. pxdlock->flag = mlckFREEPXD;
  2688. PXDaddress(&pxdlock->pxd, oxaddr);
  2689. PXDlength(&pxdlock->pxd, xlen);
  2690. pxdlock->index = 1;
  2691. /*
  2692. * 4. update the parent xad entry for relocation;
  2693. *
  2694. * acquire tlck for the parent entry with XAD_NEW as entry
  2695. * update which will write LOG_REDOPAGE and update bmap for
  2696. * allocation of XAD_NEW destination extent;
  2697. */
  2698. jfs_info("xtRelocate: update parent xad entry.");
  2699. BT_MARK_DIRTY(pmp, ip);
  2700. tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW);
  2701. xtlck = (struct xtlock *) & tlck->lock;
  2702. /* update the XAD with the new destination extent; */
  2703. xad = &pp->xad[index];
  2704. xad->flag |= XAD_NEW;
  2705. XADaddress(xad, nxaddr);
  2706. xtlck->lwm.offset = min(index, xtlck->lwm.offset);
  2707. xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) -
  2708. xtlck->lwm.offset;
  2709. /* unpin the parent xtpage */
  2710. XT_PUTPAGE(pmp);
  2711. return rc;
  2712. }
  2713. /*
  2714. * xtSearchNode()
  2715. *
  2716. * function: search for the internal xad entry covering specified extent.
  2717. * This function is mainly used by defragfs utility.
  2718. *
  2719. * parameters:
  2720. * ip - file object;
  2721. * xad - extent to find;
  2722. * cmpp - comparison result:
  2723. * btstack - traverse stack;
  2724. * flag - search process flag;
  2725. *
  2726. * returns:
  2727. * btstack contains (bn, index) of search path traversed to the entry.
  2728. * *cmpp is set to result of comparison with the entry returned.
  2729. * the page containing the entry is pinned at exit.
  2730. */
  2731. static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */
  2732. int *cmpp, struct btstack * btstack, int flag)
  2733. {
  2734. int rc = 0;
  2735. s64 xoff, xaddr;
  2736. int xlen;
  2737. int cmp = 1; /* init for empty page */
  2738. s64 bn; /* block number */
  2739. struct metapage *mp; /* meta-page buffer */
  2740. xtpage_t *p; /* page */
  2741. int base, index, lim;
  2742. struct btframe *btsp;
  2743. s64 t64;
  2744. BT_CLR(btstack);
  2745. xoff = offsetXAD(xad);
  2746. xlen = lengthXAD(xad);
  2747. xaddr = addressXAD(xad);
  2748. /*
  2749. * search down tree from root:
  2750. *
  2751. * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
  2752. * internal page, child page Pi contains entry with k, Ki <= K < Kj.
  2753. *
  2754. * if entry with search key K is not found
  2755. * internal page search find the entry with largest key Ki
  2756. * less than K which point to the child page to search;
  2757. * leaf page search find the entry with smallest key Kj
  2758. * greater than K so that the returned index is the position of
  2759. * the entry to be shifted right for insertion of new entry.
  2760. * for empty tree, search key is greater than any key of the tree.
  2761. *
  2762. * by convention, root bn = 0.
  2763. */
  2764. for (bn = 0;;) {
  2765. /* get/pin the page to search */
  2766. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  2767. if (rc)
  2768. return rc;
  2769. if (p->header.flag & BT_LEAF) {
  2770. XT_PUTPAGE(mp);
  2771. return -ESTALE;
  2772. }
  2773. lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
  2774. /*
  2775. * binary search with search key K on the current page
  2776. */
  2777. for (base = XTENTRYSTART; lim; lim >>= 1) {
  2778. index = base + (lim >> 1);
  2779. XT_CMP(cmp, xoff, &p->xad[index], t64);
  2780. if (cmp == 0) {
  2781. /*
  2782. * search hit
  2783. *
  2784. * verify for exact match;
  2785. */
  2786. if (xaddr == addressXAD(&p->xad[index]) &&
  2787. xoff == offsetXAD(&p->xad[index])) {
  2788. *cmpp = cmp;
  2789. /* save search result */
  2790. btsp = btstack->top;
  2791. btsp->bn = bn;
  2792. btsp->index = index;
  2793. btsp->mp = mp;
  2794. return 0;
  2795. }
  2796. /* descend/search its child page */
  2797. goto next;
  2798. }
  2799. if (cmp > 0) {
  2800. base = index + 1;
  2801. --lim;
  2802. }
  2803. }
  2804. /*
  2805. * search miss - non-leaf page:
  2806. *
  2807. * base is the smallest index with key (Kj) greater than
  2808. * search key (K) and may be zero or maxentry index.
  2809. * if base is non-zero, decrement base by one to get the parent
  2810. * entry of the child page to search.
  2811. */
  2812. index = base ? base - 1 : base;
  2813. /*
  2814. * go down to child page
  2815. */
  2816. next:
  2817. /* get the child page block number */
  2818. bn = addressXAD(&p->xad[index]);
  2819. /* unpin the parent page */
  2820. XT_PUTPAGE(mp);
  2821. }
  2822. }
  2823. /*
  2824. * xtRelink()
  2825. *
  2826. * function:
  2827. * link around a freed page.
  2828. *
  2829. * Parameter:
  2830. * int tid,
  2831. * struct inode *ip,
  2832. * xtpage_t *p)
  2833. *
  2834. * returns:
  2835. */
  2836. static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p)
  2837. {
  2838. int rc = 0;
  2839. struct metapage *mp;
  2840. s64 nextbn, prevbn;
  2841. struct tlock *tlck;
  2842. nextbn = le64_to_cpu(p->header.next);
  2843. prevbn = le64_to_cpu(p->header.prev);
  2844. /* update prev pointer of the next page */
  2845. if (nextbn != 0) {
  2846. XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
  2847. if (rc)
  2848. return rc;
  2849. /*
  2850. * acquire a transaction lock on the page;
  2851. *
  2852. * action: update prev pointer;
  2853. */
  2854. BT_MARK_DIRTY(mp, ip);
  2855. tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
  2856. /* the page may already have been tlock'd */
  2857. p->header.prev = cpu_to_le64(prevbn);
  2858. XT_PUTPAGE(mp);
  2859. }
  2860. /* update next pointer of the previous page */
  2861. if (prevbn != 0) {
  2862. XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
  2863. if (rc)
  2864. return rc;
  2865. /*
  2866. * acquire a transaction lock on the page;
  2867. *
  2868. * action: update next pointer;
  2869. */
  2870. BT_MARK_DIRTY(mp, ip);
  2871. tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
  2872. /* the page may already have been tlock'd */
  2873. p->header.next = le64_to_cpu(nextbn);
  2874. XT_PUTPAGE(mp);
  2875. }
  2876. return 0;
  2877. }
  2878. #endif /* _STILL_TO_PORT */
  2879. /*
  2880. * xtInitRoot()
  2881. *
  2882. * initialize file root (inline in inode)
  2883. */
  2884. void xtInitRoot(tid_t tid, struct inode *ip)
  2885. {
  2886. xtpage_t *p;
  2887. /*
  2888. * acquire a transaction lock on the root
  2889. *
  2890. * action:
  2891. */
  2892. txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
  2893. tlckXTREE | tlckNEW);
  2894. p = &JFS_IP(ip)->i_xtroot;
  2895. p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
  2896. p->header.nextindex = cpu_to_le16(XTENTRYSTART);
  2897. if (S_ISDIR(ip->i_mode))
  2898. p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR);
  2899. else {
  2900. p->header.maxentry = cpu_to_le16(XTROOTINITSLOT);
  2901. ip->i_size = 0;
  2902. }
  2903. return;
  2904. }
  2905. /*
  2906. * We can run into a deadlock truncating a file with a large number of
  2907. * xtree pages (large fragmented file). A robust fix would entail a
  2908. * reservation system where we would reserve a number of metadata pages
  2909. * and tlocks which we would be guaranteed without a deadlock. Without
  2910. * this, a partial fix is to limit number of metadata pages we will lock
  2911. * in a single transaction. Currently we will truncate the file so that
  2912. * no more than 50 leaf pages will be locked. The caller of xtTruncate
  2913. * will be responsible for ensuring that the current transaction gets
  2914. * committed, and that subsequent transactions are created to truncate
  2915. * the file further if needed.
  2916. */
  2917. #define MAX_TRUNCATE_LEAVES 50
  2918. /*
  2919. * xtTruncate()
  2920. *
  2921. * function:
  2922. * traverse for truncation logging backward bottom up;
  2923. * terminate at the last extent entry at the current subtree
  2924. * root page covering new down size.
  2925. * truncation may occur within the last extent entry.
  2926. *
  2927. * parameter:
  2928. * int tid,
  2929. * struct inode *ip,
  2930. * s64 newsize,
  2931. * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
  2932. *
  2933. * return:
  2934. *
  2935. * note:
  2936. * PWMAP:
  2937. * 1. truncate (non-COMMIT_NOLINK file)
  2938. * by jfs_truncate() or jfs_open(O_TRUNC):
  2939. * xtree is updated;
  2940. * 2. truncate index table of directory when last entry removed
  2941. * map update via tlock at commit time;
  2942. * PMAP:
  2943. * Call xtTruncate_pmap instead
  2944. * WMAP:
  2945. * 1. remove (free zero link count) on last reference release
  2946. * (pmap has been freed at commit zero link count);
  2947. * 2. truncate (COMMIT_NOLINK file, i.e., tmp file):
  2948. * xtree is updated;
  2949. * map update directly at truncation time;
  2950. *
  2951. * if (DELETE)
  2952. * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
  2953. * else if (TRUNCATE)
  2954. * must write LOG_NOREDOPAGE for deleted index page;
  2955. *
  2956. * pages may already have been tlocked by anonymous transactions
  2957. * during file growth (i.e., write) before truncation;
  2958. *
  2959. * except last truncated entry, deleted entries remains as is
  2960. * in the page (nextindex is updated) for other use
  2961. * (e.g., log/update allocation map): this avoid copying the page
  2962. * info but delay free of pages;
  2963. *
  2964. */
  2965. s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
  2966. {
  2967. int rc = 0;
  2968. s64 teof;
  2969. struct metapage *mp;
  2970. xtpage_t *p;
  2971. s64 bn;
  2972. int index, nextindex;
  2973. xad_t *xad;
  2974. s64 xoff, xaddr;
  2975. int xlen, len, freexlen;
  2976. struct btstack btstack;
  2977. struct btframe *parent;
  2978. struct tblock *tblk = NULL;
  2979. struct tlock *tlck = NULL;
  2980. struct xtlock *xtlck = NULL;
  2981. struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */
  2982. struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */
  2983. s64 nfreed;
  2984. int freed, log;
  2985. int locked_leaves = 0;
  2986. /* save object truncation type */
  2987. if (tid) {
  2988. tblk = tid_to_tblock(tid);
  2989. tblk->xflag |= flag;
  2990. }
  2991. nfreed = 0;
  2992. flag &= COMMIT_MAP;
  2993. assert(flag != COMMIT_PMAP);
  2994. if (flag == COMMIT_PWMAP)
  2995. log = 1;
  2996. else {
  2997. log = 0;
  2998. xadlock.flag = mlckFREEXADLIST;
  2999. xadlock.index = 1;
  3000. }
  3001. /*
  3002. * if the newsize is not an integral number of pages,
  3003. * the file between newsize and next page boundary will
  3004. * be cleared.
  3005. * if truncating into a file hole, it will cause
  3006. * a full block to be allocated for the logical block.
  3007. */
  3008. /*
  3009. * release page blocks of truncated region <teof, eof>
  3010. *
  3011. * free the data blocks from the leaf index blocks.
  3012. * delete the parent index entries corresponding to
  3013. * the freed child data/index blocks.
  3014. * free the index blocks themselves which aren't needed
  3015. * in new sized file.
  3016. *
  3017. * index blocks are updated only if the blocks are to be
  3018. * retained in the new sized file.
  3019. * if type is PMAP, the data and index pages are NOT
  3020. * freed, and the data and index blocks are NOT freed
  3021. * from working map.
  3022. * (this will allow continued access of data/index of
  3023. * temporary file (zerolink count file truncated to zero-length)).
  3024. */
  3025. teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
  3026. JFS_SBI(ip->i_sb)->l2bsize;
  3027. /* clear stack */
  3028. BT_CLR(&btstack);
  3029. /*
  3030. * start with root
  3031. *
  3032. * root resides in the inode
  3033. */
  3034. bn = 0;
  3035. /*
  3036. * first access of each page:
  3037. */
  3038. getPage:
  3039. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  3040. if (rc)
  3041. return rc;
  3042. /* process entries backward from last index */
  3043. index = le16_to_cpu(p->header.nextindex) - 1;
  3044. /* Since this is the rightmost page at this level, and we may have
  3045. * already freed a page that was formerly to the right, let's make
  3046. * sure that the next pointer is zero.
  3047. */
  3048. if (p->header.next) {
  3049. if (log)
  3050. /*
  3051. * Make sure this change to the header is logged.
  3052. * If we really truncate this leaf, the flag
  3053. * will be changed to tlckTRUNCATE
  3054. */
  3055. tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
  3056. BT_MARK_DIRTY(mp, ip);
  3057. p->header.next = 0;
  3058. }
  3059. if (p->header.flag & BT_INTERNAL)
  3060. goto getChild;
  3061. /*
  3062. * leaf page
  3063. */
  3064. freed = 0;
  3065. /* does region covered by leaf page precede Teof ? */
  3066. xad = &p->xad[index];
  3067. xoff = offsetXAD(xad);
  3068. xlen = lengthXAD(xad);
  3069. if (teof >= xoff + xlen) {
  3070. XT_PUTPAGE(mp);
  3071. goto getParent;
  3072. }
  3073. /* (re)acquire tlock of the leaf page */
  3074. if (log) {
  3075. if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
  3076. /*
  3077. * We need to limit the size of the transaction
  3078. * to avoid exhausting pagecache & tlocks
  3079. */
  3080. XT_PUTPAGE(mp);
  3081. newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
  3082. goto getParent;
  3083. }
  3084. tlck = txLock(tid, ip, mp, tlckXTREE);
  3085. tlck->type = tlckXTREE | tlckTRUNCATE;
  3086. xtlck = (struct xtlock *) & tlck->lock;
  3087. xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
  3088. }
  3089. BT_MARK_DIRTY(mp, ip);
  3090. /*
  3091. * scan backward leaf page entries
  3092. */
  3093. for (; index >= XTENTRYSTART; index--) {
  3094. xad = &p->xad[index];
  3095. xoff = offsetXAD(xad);
  3096. xlen = lengthXAD(xad);
  3097. xaddr = addressXAD(xad);
  3098. /*
  3099. * The "data" for a directory is indexed by the block
  3100. * device's address space. This metadata must be invalidated
  3101. * here
  3102. */
  3103. if (S_ISDIR(ip->i_mode) && (teof == 0))
  3104. invalidate_xad_metapages(ip, *xad);
  3105. /*
  3106. * entry beyond eof: continue scan of current page
  3107. * xad
  3108. * ---|---=======------->
  3109. * eof
  3110. */
  3111. if (teof < xoff) {
  3112. nfreed += xlen;
  3113. continue;
  3114. }
  3115. /*
  3116. * (xoff <= teof): last entry to be deleted from page;
  3117. * If other entries remain in page: keep and update the page.
  3118. */
  3119. /*
  3120. * eof == entry_start: delete the entry
  3121. * xad
  3122. * -------|=======------->
  3123. * eof
  3124. *
  3125. */
  3126. if (teof == xoff) {
  3127. nfreed += xlen;
  3128. if (index == XTENTRYSTART)
  3129. break;
  3130. nextindex = index;
  3131. }
  3132. /*
  3133. * eof within the entry: truncate the entry.
  3134. * xad
  3135. * -------===|===------->
  3136. * eof
  3137. */
  3138. else if (teof < xoff + xlen) {
  3139. /* update truncated entry */
  3140. len = teof - xoff;
  3141. freexlen = xlen - len;
  3142. XADlength(xad, len);
  3143. /* save pxd of truncated extent in tlck */
  3144. xaddr += len;
  3145. if (log) { /* COMMIT_PWMAP */
  3146. xtlck->lwm.offset = (xtlck->lwm.offset) ?
  3147. min(index, (int)xtlck->lwm.offset) : index;
  3148. xtlck->lwm.length = index + 1 -
  3149. xtlck->lwm.offset;
  3150. xtlck->twm.offset = index;
  3151. pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
  3152. pxdlock->flag = mlckFREEPXD;
  3153. PXDaddress(&pxdlock->pxd, xaddr);
  3154. PXDlength(&pxdlock->pxd, freexlen);
  3155. }
  3156. /* free truncated extent */
  3157. else { /* COMMIT_WMAP */
  3158. pxdlock = (struct pxd_lock *) & xadlock;
  3159. pxdlock->flag = mlckFREEPXD;
  3160. PXDaddress(&pxdlock->pxd, xaddr);
  3161. PXDlength(&pxdlock->pxd, freexlen);
  3162. txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP);
  3163. /* reset map lock */
  3164. xadlock.flag = mlckFREEXADLIST;
  3165. }
  3166. /* current entry is new last entry; */
  3167. nextindex = index + 1;
  3168. nfreed += freexlen;
  3169. }
  3170. /*
  3171. * eof beyond the entry:
  3172. * xad
  3173. * -------=======---|--->
  3174. * eof
  3175. */
  3176. else { /* (xoff + xlen < teof) */
  3177. nextindex = index + 1;
  3178. }
  3179. if (nextindex < le16_to_cpu(p->header.nextindex)) {
  3180. if (!log) { /* COMMIT_WAMP */
  3181. xadlock.xdlist = &p->xad[nextindex];
  3182. xadlock.count =
  3183. le16_to_cpu(p->header.nextindex) -
  3184. nextindex;
  3185. txFreeMap(ip, (struct maplock *) & xadlock,
  3186. NULL, COMMIT_WMAP);
  3187. }
  3188. p->header.nextindex = cpu_to_le16(nextindex);
  3189. }
  3190. XT_PUTPAGE(mp);
  3191. /* assert(freed == 0); */
  3192. goto getParent;
  3193. } /* end scan of leaf page entries */
  3194. freed = 1;
  3195. /*
  3196. * leaf page become empty: free the page if type != PMAP
  3197. */
  3198. if (log) { /* COMMIT_PWMAP */
  3199. /* txCommit() with tlckFREE:
  3200. * free data extents covered by leaf [XTENTRYSTART:hwm);
  3201. * invalidate leaf if COMMIT_PWMAP;
  3202. * if (TRUNCATE), will write LOG_NOREDOPAGE;
  3203. */
  3204. tlck->type = tlckXTREE | tlckFREE;
  3205. } else { /* COMMIT_WAMP */
  3206. /* free data extents covered by leaf */
  3207. xadlock.xdlist = &p->xad[XTENTRYSTART];
  3208. xadlock.count =
  3209. le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
  3210. txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP);
  3211. }
  3212. if (p->header.flag & BT_ROOT) {
  3213. p->header.flag &= ~BT_INTERNAL;
  3214. p->header.flag |= BT_LEAF;
  3215. p->header.nextindex = cpu_to_le16(XTENTRYSTART);
  3216. XT_PUTPAGE(mp); /* debug */
  3217. goto out;
  3218. } else {
  3219. if (log) { /* COMMIT_PWMAP */
  3220. /* page will be invalidated at tx completion
  3221. */
  3222. XT_PUTPAGE(mp);
  3223. } else { /* COMMIT_WMAP */
  3224. if (mp->lid)
  3225. lid_to_tlock(mp->lid)->flag |= tlckFREELOCK;
  3226. /* invalidate empty leaf page */
  3227. discard_metapage(mp);
  3228. }
  3229. }
  3230. /*
  3231. * the leaf page become empty: delete the parent entry
  3232. * for the leaf page if the parent page is to be kept
  3233. * in the new sized file.
  3234. */
  3235. /*
  3236. * go back up to the parent page
  3237. */
  3238. getParent:
  3239. /* pop/restore parent entry for the current child page */
  3240. if ((parent = BT_POP(&btstack)) == NULL)
  3241. /* current page must have been root */
  3242. goto out;
  3243. /* get back the parent page */
  3244. bn = parent->bn;
  3245. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  3246. if (rc)
  3247. return rc;
  3248. index = parent->index;
  3249. /*
  3250. * child page was not empty:
  3251. */
  3252. if (freed == 0) {
  3253. /* has any entry deleted from parent ? */
  3254. if (index < le16_to_cpu(p->header.nextindex) - 1) {
  3255. /* (re)acquire tlock on the parent page */
  3256. if (log) { /* COMMIT_PWMAP */
  3257. /* txCommit() with tlckTRUNCATE:
  3258. * free child extents covered by parent [);
  3259. */
  3260. tlck = txLock(tid, ip, mp, tlckXTREE);
  3261. xtlck = (struct xtlock *) & tlck->lock;
  3262. if (!(tlck->type & tlckTRUNCATE)) {
  3263. xtlck->hwm.offset =
  3264. le16_to_cpu(p->header.
  3265. nextindex) - 1;
  3266. tlck->type =
  3267. tlckXTREE | tlckTRUNCATE;
  3268. }
  3269. } else { /* COMMIT_WMAP */
  3270. /* free child extents covered by parent */
  3271. xadlock.xdlist = &p->xad[index + 1];
  3272. xadlock.count =
  3273. le16_to_cpu(p->header.nextindex) -
  3274. index - 1;
  3275. txFreeMap(ip, (struct maplock *) & xadlock,
  3276. NULL, COMMIT_WMAP);
  3277. }
  3278. BT_MARK_DIRTY(mp, ip);
  3279. p->header.nextindex = cpu_to_le16(index + 1);
  3280. }
  3281. XT_PUTPAGE(mp);
  3282. goto getParent;
  3283. }
  3284. /*
  3285. * child page was empty:
  3286. */
  3287. nfreed += lengthXAD(&p->xad[index]);
  3288. /*
  3289. * During working map update, child page's tlock must be handled
  3290. * before parent's. This is because the parent's tlock will cause
  3291. * the child's disk space to be marked available in the wmap, so
  3292. * it's important that the child page be released by that time.
  3293. *
  3294. * ToDo: tlocks should be on doubly-linked list, so we can
  3295. * quickly remove it and add it to the end.
  3296. */
  3297. /*
  3298. * Move parent page's tlock to the end of the tid's tlock list
  3299. */
  3300. if (log && mp->lid && (tblk->last != mp->lid) &&
  3301. lid_to_tlock(mp->lid)->tid) {
  3302. lid_t lid = mp->lid;
  3303. struct tlock *prev;
  3304. tlck = lid_to_tlock(lid);
  3305. if (tblk->next == lid)
  3306. tblk->next = tlck->next;
  3307. else {
  3308. for (prev = lid_to_tlock(tblk->next);
  3309. prev->next != lid;
  3310. prev = lid_to_tlock(prev->next)) {
  3311. assert(prev->next);
  3312. }
  3313. prev->next = tlck->next;
  3314. }
  3315. lid_to_tlock(tblk->last)->next = lid;
  3316. tlck->next = 0;
  3317. tblk->last = lid;
  3318. }
  3319. /*
  3320. * parent page become empty: free the page
  3321. */
  3322. if (index == XTENTRYSTART) {
  3323. if (log) { /* COMMIT_PWMAP */
  3324. /* txCommit() with tlckFREE:
  3325. * free child extents covered by parent;
  3326. * invalidate parent if COMMIT_PWMAP;
  3327. */
  3328. tlck = txLock(tid, ip, mp, tlckXTREE);
  3329. xtlck = (struct xtlock *) & tlck->lock;
  3330. xtlck->hwm.offset =
  3331. le16_to_cpu(p->header.nextindex) - 1;
  3332. tlck->type = tlckXTREE | tlckFREE;
  3333. } else { /* COMMIT_WMAP */
  3334. /* free child extents covered by parent */
  3335. xadlock.xdlist = &p->xad[XTENTRYSTART];
  3336. xadlock.count =
  3337. le16_to_cpu(p->header.nextindex) -
  3338. XTENTRYSTART;
  3339. txFreeMap(ip, (struct maplock *) & xadlock, NULL,
  3340. COMMIT_WMAP);
  3341. }
  3342. BT_MARK_DIRTY(mp, ip);
  3343. if (p->header.flag & BT_ROOT) {
  3344. p->header.flag &= ~BT_INTERNAL;
  3345. p->header.flag |= BT_LEAF;
  3346. p->header.nextindex = cpu_to_le16(XTENTRYSTART);
  3347. if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) {
  3348. /*
  3349. * Shrink root down to allow inline
  3350. * EA (otherwise fsck complains)
  3351. */
  3352. p->header.maxentry =
  3353. cpu_to_le16(XTROOTINITSLOT);
  3354. JFS_IP(ip)->mode2 |= INLINEEA;
  3355. }
  3356. XT_PUTPAGE(mp); /* debug */
  3357. goto out;
  3358. } else {
  3359. if (log) { /* COMMIT_PWMAP */
  3360. /* page will be invalidated at tx completion
  3361. */
  3362. XT_PUTPAGE(mp);
  3363. } else { /* COMMIT_WMAP */
  3364. if (mp->lid)
  3365. lid_to_tlock(mp->lid)->flag |=
  3366. tlckFREELOCK;
  3367. /* invalidate parent page */
  3368. discard_metapage(mp);
  3369. }
  3370. /* parent has become empty and freed:
  3371. * go back up to its parent page
  3372. */
  3373. /* freed = 1; */
  3374. goto getParent;
  3375. }
  3376. }
  3377. /*
  3378. * parent page still has entries for front region;
  3379. */
  3380. else {
  3381. /* try truncate region covered by preceding entry
  3382. * (process backward)
  3383. */
  3384. index--;
  3385. /* go back down to the child page corresponding
  3386. * to the entry
  3387. */
  3388. goto getChild;
  3389. }
  3390. /*
  3391. * internal page: go down to child page of current entry
  3392. */
  3393. getChild:
  3394. /* save current parent entry for the child page */
  3395. if (BT_STACK_FULL(&btstack)) {
  3396. jfs_error(ip->i_sb, "stack overrun in xtTruncate!");
  3397. XT_PUTPAGE(mp);
  3398. return -EIO;
  3399. }
  3400. BT_PUSH(&btstack, bn, index);
  3401. /* get child page */
  3402. xad = &p->xad[index];
  3403. bn = addressXAD(xad);
  3404. /*
  3405. * first access of each internal entry:
  3406. */
  3407. /* release parent page */
  3408. XT_PUTPAGE(mp);
  3409. /* process the child page */
  3410. goto getPage;
  3411. out:
  3412. /*
  3413. * update file resource stat
  3414. */
  3415. /* set size
  3416. */
  3417. if (S_ISDIR(ip->i_mode) && !newsize)
  3418. ip->i_size = 1; /* fsck hates zero-length directories */
  3419. else
  3420. ip->i_size = newsize;
  3421. /* update quota allocation to reflect freed blocks */
  3422. DQUOT_FREE_BLOCK(ip, nfreed);
  3423. /*
  3424. * free tlock of invalidated pages
  3425. */
  3426. if (flag == COMMIT_WMAP)
  3427. txFreelock(ip);
  3428. return newsize;
  3429. }
  3430. /*
  3431. * xtTruncate_pmap()
  3432. *
  3433. * function:
  3434. * Perform truncate to zero length for deleted file, leaving the
  3435. * the xtree and working map untouched. This allows the file to
  3436. * be accessed via open file handles, while the delete of the file
  3437. * is committed to disk.
  3438. *
  3439. * parameter:
  3440. * tid_t tid,
  3441. * struct inode *ip,
  3442. * s64 committed_size)
  3443. *
  3444. * return: new committed size
  3445. *
  3446. * note:
  3447. *
  3448. * To avoid deadlock by holding too many transaction locks, the
  3449. * truncation may be broken up into multiple transactions.
  3450. * The committed_size keeps track of part of the file has been
  3451. * freed from the pmaps.
  3452. */
  3453. s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
  3454. {
  3455. s64 bn;
  3456. struct btstack btstack;
  3457. int cmp;
  3458. int index;
  3459. int locked_leaves = 0;
  3460. struct metapage *mp;
  3461. xtpage_t *p;
  3462. struct btframe *parent;
  3463. int rc;
  3464. struct tblock *tblk;
  3465. struct tlock *tlck = NULL;
  3466. xad_t *xad;
  3467. int xlen;
  3468. s64 xoff;
  3469. struct xtlock *xtlck = NULL;
  3470. /* save object truncation type */
  3471. tblk = tid_to_tblock(tid);
  3472. tblk->xflag |= COMMIT_PMAP;
  3473. /* clear stack */
  3474. BT_CLR(&btstack);
  3475. if (committed_size) {
  3476. xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
  3477. rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
  3478. if (rc)
  3479. return rc;
  3480. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  3481. if (cmp != 0) {
  3482. XT_PUTPAGE(mp);
  3483. jfs_error(ip->i_sb,
  3484. "xtTruncate_pmap: did not find extent");
  3485. return -EIO;
  3486. }
  3487. } else {
  3488. /*
  3489. * start with root
  3490. *
  3491. * root resides in the inode
  3492. */
  3493. bn = 0;
  3494. /*
  3495. * first access of each page:
  3496. */
  3497. getPage:
  3498. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  3499. if (rc)
  3500. return rc;
  3501. /* process entries backward from last index */
  3502. index = le16_to_cpu(p->header.nextindex) - 1;
  3503. if (p->header.flag & BT_INTERNAL)
  3504. goto getChild;
  3505. }
  3506. /*
  3507. * leaf page
  3508. */
  3509. if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
  3510. /*
  3511. * We need to limit the size of the transaction
  3512. * to avoid exhausting pagecache & tlocks
  3513. */
  3514. xad = &p->xad[index];
  3515. xoff = offsetXAD(xad);
  3516. xlen = lengthXAD(xad);
  3517. XT_PUTPAGE(mp);
  3518. return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
  3519. }
  3520. tlck = txLock(tid, ip, mp, tlckXTREE);
  3521. tlck->type = tlckXTREE | tlckFREE;
  3522. xtlck = (struct xtlock *) & tlck->lock;
  3523. xtlck->hwm.offset = index;
  3524. XT_PUTPAGE(mp);
  3525. /*
  3526. * go back up to the parent page
  3527. */
  3528. getParent:
  3529. /* pop/restore parent entry for the current child page */
  3530. if ((parent = BT_POP(&btstack)) == NULL)
  3531. /* current page must have been root */
  3532. goto out;
  3533. /* get back the parent page */
  3534. bn = parent->bn;
  3535. XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  3536. if (rc)
  3537. return rc;
  3538. index = parent->index;
  3539. /*
  3540. * parent page become empty: free the page
  3541. */
  3542. if (index == XTENTRYSTART) {
  3543. /* txCommit() with tlckFREE:
  3544. * free child extents covered by parent;
  3545. * invalidate parent if COMMIT_PWMAP;
  3546. */
  3547. tlck = txLock(tid, ip, mp, tlckXTREE);
  3548. xtlck = (struct xtlock *) & tlck->lock;
  3549. xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
  3550. tlck->type = tlckXTREE | tlckFREE;
  3551. XT_PUTPAGE(mp);
  3552. if (p->header.flag & BT_ROOT) {
  3553. goto out;
  3554. } else {
  3555. goto getParent;
  3556. }
  3557. }
  3558. /*
  3559. * parent page still has entries for front region;
  3560. */
  3561. else
  3562. index--;
  3563. /*
  3564. * internal page: go down to child page of current entry
  3565. */
  3566. getChild:
  3567. /* save current parent entry for the child page */
  3568. if (BT_STACK_FULL(&btstack)) {
  3569. jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!");
  3570. XT_PUTPAGE(mp);
  3571. return -EIO;
  3572. }
  3573. BT_PUSH(&btstack, bn, index);
  3574. /* get child page */
  3575. xad = &p->xad[index];
  3576. bn = addressXAD(xad);
  3577. /*
  3578. * first access of each internal entry:
  3579. */
  3580. /* release parent page */
  3581. XT_PUTPAGE(mp);
  3582. /* process the child page */
  3583. goto getPage;
  3584. out:
  3585. return 0;
  3586. }
  3587. #ifdef CONFIG_JFS_STATISTICS
  3588. static int jfs_xtstat_proc_show(struct seq_file *m, void *v)
  3589. {
  3590. seq_printf(m,
  3591. "JFS Xtree statistics\n"
  3592. "====================\n"
  3593. "searches = %d\n"
  3594. "fast searches = %d\n"
  3595. "splits = %d\n",
  3596. xtStat.search,
  3597. xtStat.fastSearch,
  3598. xtStat.split);
  3599. return 0;
  3600. }
  3601. static int jfs_xtstat_proc_open(struct inode *inode, struct file *file)
  3602. {
  3603. return single_open(file, jfs_xtstat_proc_show, NULL);
  3604. }
  3605. const struct file_operations jfs_xtstat_proc_fops = {
  3606. .owner = THIS_MODULE,
  3607. .open = jfs_xtstat_proc_open,
  3608. .read = seq_read,
  3609. .llseek = seq_lseek,
  3610. .release = single_release,
  3611. };
  3612. #endif