super.c 82 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003
  1. /*
  2. * linux/fs/ext3/super.c
  3. *
  4. * Copyright (C) 1992, 1993, 1994, 1995
  5. * Remy Card (card@masi.ibp.fr)
  6. * Laboratoire MASI - Institut Blaise Pascal
  7. * Universite Pierre et Marie Curie (Paris VI)
  8. *
  9. * from
  10. *
  11. * linux/fs/minix/inode.c
  12. *
  13. * Copyright (C) 1991, 1992 Linus Torvalds
  14. *
  15. * Big-endian to little-endian byte-swapping/bitmaps by
  16. * David S. Miller (davem@caip.rutgers.edu), 1995
  17. */
  18. #include <linux/module.h>
  19. #include <linux/string.h>
  20. #include <linux/fs.h>
  21. #include <linux/time.h>
  22. #include <linux/jbd.h>
  23. #include <linux/ext3_fs.h>
  24. #include <linux/ext3_jbd.h>
  25. #include <linux/slab.h>
  26. #include <linux/init.h>
  27. #include <linux/blkdev.h>
  28. #include <linux/parser.h>
  29. #include <linux/smp_lock.h>
  30. #include <linux/buffer_head.h>
  31. #include <linux/exportfs.h>
  32. #include <linux/vfs.h>
  33. #include <linux/random.h>
  34. #include <linux/mount.h>
  35. #include <linux/namei.h>
  36. #include <linux/quotaops.h>
  37. #include <linux/seq_file.h>
  38. #include <linux/log2.h>
  39. #include <asm/uaccess.h>
  40. #include "xattr.h"
  41. #include "acl.h"
  42. #include "namei.h"
  43. static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
  44. unsigned long journal_devnum);
  45. static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
  46. unsigned int);
  47. static void ext3_commit_super (struct super_block * sb,
  48. struct ext3_super_block * es,
  49. int sync);
  50. static void ext3_mark_recovery_complete(struct super_block * sb,
  51. struct ext3_super_block * es);
  52. static void ext3_clear_journal_err(struct super_block * sb,
  53. struct ext3_super_block * es);
  54. static int ext3_sync_fs(struct super_block *sb, int wait);
  55. static const char *ext3_decode_error(struct super_block * sb, int errno,
  56. char nbuf[16]);
  57. static int ext3_remount (struct super_block * sb, int * flags, char * data);
  58. static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf);
  59. static void ext3_unlockfs(struct super_block *sb);
  60. static void ext3_write_super (struct super_block * sb);
  61. static void ext3_write_super_lockfs(struct super_block *sb);
  62. /*
  63. * Wrappers for journal_start/end.
  64. *
  65. * The only special thing we need to do here is to make sure that all
  66. * journal_end calls result in the superblock being marked dirty, so
  67. * that sync() will call the filesystem's write_super callback if
  68. * appropriate.
  69. */
  70. handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
  71. {
  72. journal_t *journal;
  73. if (sb->s_flags & MS_RDONLY)
  74. return ERR_PTR(-EROFS);
  75. /* Special case here: if the journal has aborted behind our
  76. * backs (eg. EIO in the commit thread), then we still need to
  77. * take the FS itself readonly cleanly. */
  78. journal = EXT3_SB(sb)->s_journal;
  79. if (is_journal_aborted(journal)) {
  80. ext3_abort(sb, __func__,
  81. "Detected aborted journal");
  82. return ERR_PTR(-EROFS);
  83. }
  84. return journal_start(journal, nblocks);
  85. }
  86. /*
  87. * The only special thing we need to do here is to make sure that all
  88. * journal_stop calls result in the superblock being marked dirty, so
  89. * that sync() will call the filesystem's write_super callback if
  90. * appropriate.
  91. */
  92. int __ext3_journal_stop(const char *where, handle_t *handle)
  93. {
  94. struct super_block *sb;
  95. int err;
  96. int rc;
  97. sb = handle->h_transaction->t_journal->j_private;
  98. err = handle->h_err;
  99. rc = journal_stop(handle);
  100. if (!err)
  101. err = rc;
  102. if (err)
  103. __ext3_std_error(sb, where, err);
  104. return err;
  105. }
  106. void ext3_journal_abort_handle(const char *caller, const char *err_fn,
  107. struct buffer_head *bh, handle_t *handle, int err)
  108. {
  109. char nbuf[16];
  110. const char *errstr = ext3_decode_error(NULL, err, nbuf);
  111. if (bh)
  112. BUFFER_TRACE(bh, "abort");
  113. if (!handle->h_err)
  114. handle->h_err = err;
  115. if (is_handle_aborted(handle))
  116. return;
  117. printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
  118. caller, errstr, err_fn);
  119. journal_abort_handle(handle);
  120. }
  121. /* Deal with the reporting of failure conditions on a filesystem such as
  122. * inconsistencies detected or read IO failures.
  123. *
  124. * On ext2, we can store the error state of the filesystem in the
  125. * superblock. That is not possible on ext3, because we may have other
  126. * write ordering constraints on the superblock which prevent us from
  127. * writing it out straight away; and given that the journal is about to
  128. * be aborted, we can't rely on the current, or future, transactions to
  129. * write out the superblock safely.
  130. *
  131. * We'll just use the journal_abort() error code to record an error in
  132. * the journal instead. On recovery, the journal will compain about
  133. * that error until we've noted it down and cleared it.
  134. */
  135. static void ext3_handle_error(struct super_block *sb)
  136. {
  137. struct ext3_super_block *es = EXT3_SB(sb)->s_es;
  138. EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
  139. es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
  140. if (sb->s_flags & MS_RDONLY)
  141. return;
  142. if (!test_opt (sb, ERRORS_CONT)) {
  143. journal_t *journal = EXT3_SB(sb)->s_journal;
  144. EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
  145. if (journal)
  146. journal_abort(journal, -EIO);
  147. }
  148. if (test_opt (sb, ERRORS_RO)) {
  149. printk (KERN_CRIT "Remounting filesystem read-only\n");
  150. sb->s_flags |= MS_RDONLY;
  151. }
  152. ext3_commit_super(sb, es, 1);
  153. if (test_opt(sb, ERRORS_PANIC))
  154. panic("EXT3-fs (device %s): panic forced after error\n",
  155. sb->s_id);
  156. }
  157. void ext3_error (struct super_block * sb, const char * function,
  158. const char * fmt, ...)
  159. {
  160. va_list args;
  161. va_start(args, fmt);
  162. printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
  163. vprintk(fmt, args);
  164. printk("\n");
  165. va_end(args);
  166. ext3_handle_error(sb);
  167. }
  168. static const char *ext3_decode_error(struct super_block * sb, int errno,
  169. char nbuf[16])
  170. {
  171. char *errstr = NULL;
  172. switch (errno) {
  173. case -EIO:
  174. errstr = "IO failure";
  175. break;
  176. case -ENOMEM:
  177. errstr = "Out of memory";
  178. break;
  179. case -EROFS:
  180. if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT)
  181. errstr = "Journal has aborted";
  182. else
  183. errstr = "Readonly filesystem";
  184. break;
  185. default:
  186. /* If the caller passed in an extra buffer for unknown
  187. * errors, textualise them now. Else we just return
  188. * NULL. */
  189. if (nbuf) {
  190. /* Check for truncated error codes... */
  191. if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
  192. errstr = nbuf;
  193. }
  194. break;
  195. }
  196. return errstr;
  197. }
  198. /* __ext3_std_error decodes expected errors from journaling functions
  199. * automatically and invokes the appropriate error response. */
  200. void __ext3_std_error (struct super_block * sb, const char * function,
  201. int errno)
  202. {
  203. char nbuf[16];
  204. const char *errstr;
  205. /* Special case: if the error is EROFS, and we're not already
  206. * inside a transaction, then there's really no point in logging
  207. * an error. */
  208. if (errno == -EROFS && journal_current_handle() == NULL &&
  209. (sb->s_flags & MS_RDONLY))
  210. return;
  211. errstr = ext3_decode_error(sb, errno, nbuf);
  212. printk (KERN_CRIT "EXT3-fs error (device %s) in %s: %s\n",
  213. sb->s_id, function, errstr);
  214. ext3_handle_error(sb);
  215. }
  216. /*
  217. * ext3_abort is a much stronger failure handler than ext3_error. The
  218. * abort function may be used to deal with unrecoverable failures such
  219. * as journal IO errors or ENOMEM at a critical moment in log management.
  220. *
  221. * We unconditionally force the filesystem into an ABORT|READONLY state,
  222. * unless the error response on the fs has been set to panic in which
  223. * case we take the easy way out and panic immediately.
  224. */
  225. void ext3_abort (struct super_block * sb, const char * function,
  226. const char * fmt, ...)
  227. {
  228. va_list args;
  229. printk (KERN_CRIT "ext3_abort called.\n");
  230. va_start(args, fmt);
  231. printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
  232. vprintk(fmt, args);
  233. printk("\n");
  234. va_end(args);
  235. if (test_opt(sb, ERRORS_PANIC))
  236. panic("EXT3-fs panic from previous error\n");
  237. if (sb->s_flags & MS_RDONLY)
  238. return;
  239. printk(KERN_CRIT "Remounting filesystem read-only\n");
  240. EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
  241. sb->s_flags |= MS_RDONLY;
  242. EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
  243. if (EXT3_SB(sb)->s_journal)
  244. journal_abort(EXT3_SB(sb)->s_journal, -EIO);
  245. }
  246. void ext3_warning (struct super_block * sb, const char * function,
  247. const char * fmt, ...)
  248. {
  249. va_list args;
  250. va_start(args, fmt);
  251. printk(KERN_WARNING "EXT3-fs warning (device %s): %s: ",
  252. sb->s_id, function);
  253. vprintk(fmt, args);
  254. printk("\n");
  255. va_end(args);
  256. }
  257. void ext3_update_dynamic_rev(struct super_block *sb)
  258. {
  259. struct ext3_super_block *es = EXT3_SB(sb)->s_es;
  260. if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
  261. return;
  262. ext3_warning(sb, __func__,
  263. "updating to rev %d because of new feature flag, "
  264. "running e2fsck is recommended",
  265. EXT3_DYNAMIC_REV);
  266. es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO);
  267. es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE);
  268. es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV);
  269. /* leave es->s_feature_*compat flags alone */
  270. /* es->s_uuid will be set by e2fsck if empty */
  271. /*
  272. * The rest of the superblock fields should be zero, and if not it
  273. * means they are likely already in use, so leave them alone. We
  274. * can leave it up to e2fsck to clean up any inconsistencies there.
  275. */
  276. }
  277. /*
  278. * Open the external journal device
  279. */
  280. static struct block_device *ext3_blkdev_get(dev_t dev)
  281. {
  282. struct block_device *bdev;
  283. char b[BDEVNAME_SIZE];
  284. bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
  285. if (IS_ERR(bdev))
  286. goto fail;
  287. return bdev;
  288. fail:
  289. printk(KERN_ERR "EXT3: failed to open journal device %s: %ld\n",
  290. __bdevname(dev, b), PTR_ERR(bdev));
  291. return NULL;
  292. }
  293. /*
  294. * Release the journal device
  295. */
  296. static int ext3_blkdev_put(struct block_device *bdev)
  297. {
  298. bd_release(bdev);
  299. return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
  300. }
  301. static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
  302. {
  303. struct block_device *bdev;
  304. int ret = -ENODEV;
  305. bdev = sbi->journal_bdev;
  306. if (bdev) {
  307. ret = ext3_blkdev_put(bdev);
  308. sbi->journal_bdev = NULL;
  309. }
  310. return ret;
  311. }
  312. static inline struct inode *orphan_list_entry(struct list_head *l)
  313. {
  314. return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode;
  315. }
  316. static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
  317. {
  318. struct list_head *l;
  319. printk(KERN_ERR "sb orphan head is %d\n",
  320. le32_to_cpu(sbi->s_es->s_last_orphan));
  321. printk(KERN_ERR "sb_info orphan list:\n");
  322. list_for_each(l, &sbi->s_orphan) {
  323. struct inode *inode = orphan_list_entry(l);
  324. printk(KERN_ERR " "
  325. "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
  326. inode->i_sb->s_id, inode->i_ino, inode,
  327. inode->i_mode, inode->i_nlink,
  328. NEXT_ORPHAN(inode));
  329. }
  330. }
  331. static void ext3_put_super (struct super_block * sb)
  332. {
  333. struct ext3_sb_info *sbi = EXT3_SB(sb);
  334. struct ext3_super_block *es = sbi->s_es;
  335. int i, err;
  336. ext3_xattr_put_super(sb);
  337. err = journal_destroy(sbi->s_journal);
  338. sbi->s_journal = NULL;
  339. if (err < 0)
  340. ext3_abort(sb, __func__, "Couldn't clean up the journal");
  341. if (!(sb->s_flags & MS_RDONLY)) {
  342. EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
  343. es->s_state = cpu_to_le16(sbi->s_mount_state);
  344. BUFFER_TRACE(sbi->s_sbh, "marking dirty");
  345. mark_buffer_dirty(sbi->s_sbh);
  346. ext3_commit_super(sb, es, 1);
  347. }
  348. for (i = 0; i < sbi->s_gdb_count; i++)
  349. brelse(sbi->s_group_desc[i]);
  350. kfree(sbi->s_group_desc);
  351. percpu_counter_destroy(&sbi->s_freeblocks_counter);
  352. percpu_counter_destroy(&sbi->s_freeinodes_counter);
  353. percpu_counter_destroy(&sbi->s_dirs_counter);
  354. brelse(sbi->s_sbh);
  355. #ifdef CONFIG_QUOTA
  356. for (i = 0; i < MAXQUOTAS; i++)
  357. kfree(sbi->s_qf_names[i]);
  358. #endif
  359. /* Debugging code just in case the in-memory inode orphan list
  360. * isn't empty. The on-disk one can be non-empty if we've
  361. * detected an error and taken the fs readonly, but the
  362. * in-memory list had better be clean by this point. */
  363. if (!list_empty(&sbi->s_orphan))
  364. dump_orphan_list(sb, sbi);
  365. J_ASSERT(list_empty(&sbi->s_orphan));
  366. invalidate_bdev(sb->s_bdev);
  367. if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
  368. /*
  369. * Invalidate the journal device's buffers. We don't want them
  370. * floating about in memory - the physical journal device may
  371. * hotswapped, and it breaks the `ro-after' testing code.
  372. */
  373. sync_blockdev(sbi->journal_bdev);
  374. invalidate_bdev(sbi->journal_bdev);
  375. ext3_blkdev_remove(sbi);
  376. }
  377. sb->s_fs_info = NULL;
  378. kfree(sbi);
  379. return;
  380. }
  381. static struct kmem_cache *ext3_inode_cachep;
  382. /*
  383. * Called inside transaction, so use GFP_NOFS
  384. */
  385. static struct inode *ext3_alloc_inode(struct super_block *sb)
  386. {
  387. struct ext3_inode_info *ei;
  388. ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS);
  389. if (!ei)
  390. return NULL;
  391. #ifdef CONFIG_EXT3_FS_POSIX_ACL
  392. ei->i_acl = EXT3_ACL_NOT_CACHED;
  393. ei->i_default_acl = EXT3_ACL_NOT_CACHED;
  394. #endif
  395. ei->i_block_alloc_info = NULL;
  396. ei->vfs_inode.i_version = 1;
  397. return &ei->vfs_inode;
  398. }
  399. static void ext3_destroy_inode(struct inode *inode)
  400. {
  401. if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
  402. printk("EXT3 Inode %p: orphan list check failed!\n",
  403. EXT3_I(inode));
  404. print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
  405. EXT3_I(inode), sizeof(struct ext3_inode_info),
  406. false);
  407. dump_stack();
  408. }
  409. kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
  410. }
  411. static void init_once(void *foo)
  412. {
  413. struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
  414. INIT_LIST_HEAD(&ei->i_orphan);
  415. #ifdef CONFIG_EXT3_FS_XATTR
  416. init_rwsem(&ei->xattr_sem);
  417. #endif
  418. mutex_init(&ei->truncate_mutex);
  419. inode_init_once(&ei->vfs_inode);
  420. }
  421. static int init_inodecache(void)
  422. {
  423. ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
  424. sizeof(struct ext3_inode_info),
  425. 0, (SLAB_RECLAIM_ACCOUNT|
  426. SLAB_MEM_SPREAD),
  427. init_once);
  428. if (ext3_inode_cachep == NULL)
  429. return -ENOMEM;
  430. return 0;
  431. }
  432. static void destroy_inodecache(void)
  433. {
  434. kmem_cache_destroy(ext3_inode_cachep);
  435. }
  436. static void ext3_clear_inode(struct inode *inode)
  437. {
  438. struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
  439. #ifdef CONFIG_EXT3_FS_POSIX_ACL
  440. if (EXT3_I(inode)->i_acl &&
  441. EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) {
  442. posix_acl_release(EXT3_I(inode)->i_acl);
  443. EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED;
  444. }
  445. if (EXT3_I(inode)->i_default_acl &&
  446. EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) {
  447. posix_acl_release(EXT3_I(inode)->i_default_acl);
  448. EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED;
  449. }
  450. #endif
  451. ext3_discard_reservation(inode);
  452. EXT3_I(inode)->i_block_alloc_info = NULL;
  453. if (unlikely(rsv))
  454. kfree(rsv);
  455. }
  456. static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb)
  457. {
  458. #if defined(CONFIG_QUOTA)
  459. struct ext3_sb_info *sbi = EXT3_SB(sb);
  460. if (sbi->s_jquota_fmt)
  461. seq_printf(seq, ",jqfmt=%s",
  462. (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0");
  463. if (sbi->s_qf_names[USRQUOTA])
  464. seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
  465. if (sbi->s_qf_names[GRPQUOTA])
  466. seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
  467. if (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA)
  468. seq_puts(seq, ",usrquota");
  469. if (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)
  470. seq_puts(seq, ",grpquota");
  471. #endif
  472. }
  473. /*
  474. * Show an option if
  475. * - it's set to a non-default value OR
  476. * - if the per-sb default is different from the global default
  477. */
  478. static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
  479. {
  480. struct super_block *sb = vfs->mnt_sb;
  481. struct ext3_sb_info *sbi = EXT3_SB(sb);
  482. struct ext3_super_block *es = sbi->s_es;
  483. unsigned long def_mount_opts;
  484. def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
  485. if (sbi->s_sb_block != 1)
  486. seq_printf(seq, ",sb=%lu", sbi->s_sb_block);
  487. if (test_opt(sb, MINIX_DF))
  488. seq_puts(seq, ",minixdf");
  489. if (test_opt(sb, GRPID))
  490. seq_puts(seq, ",grpid");
  491. if (!test_opt(sb, GRPID) && (def_mount_opts & EXT3_DEFM_BSDGROUPS))
  492. seq_puts(seq, ",nogrpid");
  493. if (sbi->s_resuid != EXT3_DEF_RESUID ||
  494. le16_to_cpu(es->s_def_resuid) != EXT3_DEF_RESUID) {
  495. seq_printf(seq, ",resuid=%u", sbi->s_resuid);
  496. }
  497. if (sbi->s_resgid != EXT3_DEF_RESGID ||
  498. le16_to_cpu(es->s_def_resgid) != EXT3_DEF_RESGID) {
  499. seq_printf(seq, ",resgid=%u", sbi->s_resgid);
  500. }
  501. if (test_opt(sb, ERRORS_RO)) {
  502. int def_errors = le16_to_cpu(es->s_errors);
  503. if (def_errors == EXT3_ERRORS_PANIC ||
  504. def_errors == EXT3_ERRORS_CONTINUE) {
  505. seq_puts(seq, ",errors=remount-ro");
  506. }
  507. }
  508. if (test_opt(sb, ERRORS_CONT))
  509. seq_puts(seq, ",errors=continue");
  510. if (test_opt(sb, ERRORS_PANIC))
  511. seq_puts(seq, ",errors=panic");
  512. if (test_opt(sb, NO_UID32))
  513. seq_puts(seq, ",nouid32");
  514. if (test_opt(sb, DEBUG))
  515. seq_puts(seq, ",debug");
  516. if (test_opt(sb, OLDALLOC))
  517. seq_puts(seq, ",oldalloc");
  518. #ifdef CONFIG_EXT3_FS_XATTR
  519. if (test_opt(sb, XATTR_USER))
  520. seq_puts(seq, ",user_xattr");
  521. if (!test_opt(sb, XATTR_USER) &&
  522. (def_mount_opts & EXT3_DEFM_XATTR_USER)) {
  523. seq_puts(seq, ",nouser_xattr");
  524. }
  525. #endif
  526. #ifdef CONFIG_EXT3_FS_POSIX_ACL
  527. if (test_opt(sb, POSIX_ACL))
  528. seq_puts(seq, ",acl");
  529. if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT3_DEFM_ACL))
  530. seq_puts(seq, ",noacl");
  531. #endif
  532. if (!test_opt(sb, RESERVATION))
  533. seq_puts(seq, ",noreservation");
  534. if (sbi->s_commit_interval) {
  535. seq_printf(seq, ",commit=%u",
  536. (unsigned) (sbi->s_commit_interval / HZ));
  537. }
  538. if (test_opt(sb, BARRIER))
  539. seq_puts(seq, ",barrier=1");
  540. if (test_opt(sb, NOBH))
  541. seq_puts(seq, ",nobh");
  542. if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
  543. seq_puts(seq, ",data=journal");
  544. else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
  545. seq_puts(seq, ",data=ordered");
  546. else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
  547. seq_puts(seq, ",data=writeback");
  548. if (test_opt(sb, DATA_ERR_ABORT))
  549. seq_puts(seq, ",data_err=abort");
  550. ext3_show_quota_options(seq, sb);
  551. return 0;
  552. }
  553. static struct inode *ext3_nfs_get_inode(struct super_block *sb,
  554. u64 ino, u32 generation)
  555. {
  556. struct inode *inode;
  557. if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO)
  558. return ERR_PTR(-ESTALE);
  559. if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count))
  560. return ERR_PTR(-ESTALE);
  561. /* iget isn't really right if the inode is currently unallocated!!
  562. *
  563. * ext3_read_inode will return a bad_inode if the inode had been
  564. * deleted, so we should be safe.
  565. *
  566. * Currently we don't know the generation for parent directory, so
  567. * a generation of 0 means "accept any"
  568. */
  569. inode = ext3_iget(sb, ino);
  570. if (IS_ERR(inode))
  571. return ERR_CAST(inode);
  572. if (generation && inode->i_generation != generation) {
  573. iput(inode);
  574. return ERR_PTR(-ESTALE);
  575. }
  576. return inode;
  577. }
  578. static struct dentry *ext3_fh_to_dentry(struct super_block *sb, struct fid *fid,
  579. int fh_len, int fh_type)
  580. {
  581. return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
  582. ext3_nfs_get_inode);
  583. }
  584. static struct dentry *ext3_fh_to_parent(struct super_block *sb, struct fid *fid,
  585. int fh_len, int fh_type)
  586. {
  587. return generic_fh_to_parent(sb, fid, fh_len, fh_type,
  588. ext3_nfs_get_inode);
  589. }
  590. #ifdef CONFIG_QUOTA
  591. #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
  592. #define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
  593. static int ext3_dquot_initialize(struct inode *inode, int type);
  594. static int ext3_dquot_drop(struct inode *inode);
  595. static int ext3_write_dquot(struct dquot *dquot);
  596. static int ext3_acquire_dquot(struct dquot *dquot);
  597. static int ext3_release_dquot(struct dquot *dquot);
  598. static int ext3_mark_dquot_dirty(struct dquot *dquot);
  599. static int ext3_write_info(struct super_block *sb, int type);
  600. static int ext3_quota_on(struct super_block *sb, int type, int format_id,
  601. char *path, int remount);
  602. static int ext3_quota_on_mount(struct super_block *sb, int type);
  603. static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
  604. size_t len, loff_t off);
  605. static ssize_t ext3_quota_write(struct super_block *sb, int type,
  606. const char *data, size_t len, loff_t off);
  607. static struct dquot_operations ext3_quota_operations = {
  608. .initialize = ext3_dquot_initialize,
  609. .drop = ext3_dquot_drop,
  610. .alloc_space = dquot_alloc_space,
  611. .alloc_inode = dquot_alloc_inode,
  612. .free_space = dquot_free_space,
  613. .free_inode = dquot_free_inode,
  614. .transfer = dquot_transfer,
  615. .write_dquot = ext3_write_dquot,
  616. .acquire_dquot = ext3_acquire_dquot,
  617. .release_dquot = ext3_release_dquot,
  618. .mark_dirty = ext3_mark_dquot_dirty,
  619. .write_info = ext3_write_info,
  620. .alloc_dquot = dquot_alloc,
  621. .destroy_dquot = dquot_destroy,
  622. };
  623. static struct quotactl_ops ext3_qctl_operations = {
  624. .quota_on = ext3_quota_on,
  625. .quota_off = vfs_quota_off,
  626. .quota_sync = vfs_quota_sync,
  627. .get_info = vfs_get_dqinfo,
  628. .set_info = vfs_set_dqinfo,
  629. .get_dqblk = vfs_get_dqblk,
  630. .set_dqblk = vfs_set_dqblk
  631. };
  632. #endif
  633. static const struct super_operations ext3_sops = {
  634. .alloc_inode = ext3_alloc_inode,
  635. .destroy_inode = ext3_destroy_inode,
  636. .write_inode = ext3_write_inode,
  637. .dirty_inode = ext3_dirty_inode,
  638. .delete_inode = ext3_delete_inode,
  639. .put_super = ext3_put_super,
  640. .write_super = ext3_write_super,
  641. .sync_fs = ext3_sync_fs,
  642. .write_super_lockfs = ext3_write_super_lockfs,
  643. .unlockfs = ext3_unlockfs,
  644. .statfs = ext3_statfs,
  645. .remount_fs = ext3_remount,
  646. .clear_inode = ext3_clear_inode,
  647. .show_options = ext3_show_options,
  648. #ifdef CONFIG_QUOTA
  649. .quota_read = ext3_quota_read,
  650. .quota_write = ext3_quota_write,
  651. #endif
  652. };
  653. static const struct export_operations ext3_export_ops = {
  654. .fh_to_dentry = ext3_fh_to_dentry,
  655. .fh_to_parent = ext3_fh_to_parent,
  656. .get_parent = ext3_get_parent,
  657. };
  658. enum {
  659. Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
  660. Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
  661. Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
  662. Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
  663. Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
  664. Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
  665. Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
  666. Opt_data_err_abort, Opt_data_err_ignore,
  667. Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
  668. Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
  669. Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
  670. Opt_grpquota
  671. };
  672. static const match_table_t tokens = {
  673. {Opt_bsd_df, "bsddf"},
  674. {Opt_minix_df, "minixdf"},
  675. {Opt_grpid, "grpid"},
  676. {Opt_grpid, "bsdgroups"},
  677. {Opt_nogrpid, "nogrpid"},
  678. {Opt_nogrpid, "sysvgroups"},
  679. {Opt_resgid, "resgid=%u"},
  680. {Opt_resuid, "resuid=%u"},
  681. {Opt_sb, "sb=%u"},
  682. {Opt_err_cont, "errors=continue"},
  683. {Opt_err_panic, "errors=panic"},
  684. {Opt_err_ro, "errors=remount-ro"},
  685. {Opt_nouid32, "nouid32"},
  686. {Opt_nocheck, "nocheck"},
  687. {Opt_nocheck, "check=none"},
  688. {Opt_debug, "debug"},
  689. {Opt_oldalloc, "oldalloc"},
  690. {Opt_orlov, "orlov"},
  691. {Opt_user_xattr, "user_xattr"},
  692. {Opt_nouser_xattr, "nouser_xattr"},
  693. {Opt_acl, "acl"},
  694. {Opt_noacl, "noacl"},
  695. {Opt_reservation, "reservation"},
  696. {Opt_noreservation, "noreservation"},
  697. {Opt_noload, "noload"},
  698. {Opt_nobh, "nobh"},
  699. {Opt_bh, "bh"},
  700. {Opt_commit, "commit=%u"},
  701. {Opt_journal_update, "journal=update"},
  702. {Opt_journal_inum, "journal=%u"},
  703. {Opt_journal_dev, "journal_dev=%u"},
  704. {Opt_abort, "abort"},
  705. {Opt_data_journal, "data=journal"},
  706. {Opt_data_ordered, "data=ordered"},
  707. {Opt_data_writeback, "data=writeback"},
  708. {Opt_data_err_abort, "data_err=abort"},
  709. {Opt_data_err_ignore, "data_err=ignore"},
  710. {Opt_offusrjquota, "usrjquota="},
  711. {Opt_usrjquota, "usrjquota=%s"},
  712. {Opt_offgrpjquota, "grpjquota="},
  713. {Opt_grpjquota, "grpjquota=%s"},
  714. {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
  715. {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
  716. {Opt_grpquota, "grpquota"},
  717. {Opt_noquota, "noquota"},
  718. {Opt_quota, "quota"},
  719. {Opt_usrquota, "usrquota"},
  720. {Opt_barrier, "barrier=%u"},
  721. {Opt_resize, "resize"},
  722. {Opt_err, NULL},
  723. };
  724. static ext3_fsblk_t get_sb_block(void **data)
  725. {
  726. ext3_fsblk_t sb_block;
  727. char *options = (char *) *data;
  728. if (!options || strncmp(options, "sb=", 3) != 0)
  729. return 1; /* Default location */
  730. options += 3;
  731. /*todo: use simple_strtoll with >32bit ext3 */
  732. sb_block = simple_strtoul(options, &options, 0);
  733. if (*options && *options != ',') {
  734. printk("EXT3-fs: Invalid sb specification: %s\n",
  735. (char *) *data);
  736. return 1;
  737. }
  738. if (*options == ',')
  739. options++;
  740. *data = (void *) options;
  741. return sb_block;
  742. }
  743. static int parse_options (char *options, struct super_block *sb,
  744. unsigned int *inum, unsigned long *journal_devnum,
  745. ext3_fsblk_t *n_blocks_count, int is_remount)
  746. {
  747. struct ext3_sb_info *sbi = EXT3_SB(sb);
  748. char * p;
  749. substring_t args[MAX_OPT_ARGS];
  750. int data_opt = 0;
  751. int option;
  752. #ifdef CONFIG_QUOTA
  753. int qtype, qfmt;
  754. char *qname;
  755. #endif
  756. if (!options)
  757. return 1;
  758. while ((p = strsep (&options, ",")) != NULL) {
  759. int token;
  760. if (!*p)
  761. continue;
  762. token = match_token(p, tokens, args);
  763. switch (token) {
  764. case Opt_bsd_df:
  765. clear_opt (sbi->s_mount_opt, MINIX_DF);
  766. break;
  767. case Opt_minix_df:
  768. set_opt (sbi->s_mount_opt, MINIX_DF);
  769. break;
  770. case Opt_grpid:
  771. set_opt (sbi->s_mount_opt, GRPID);
  772. break;
  773. case Opt_nogrpid:
  774. clear_opt (sbi->s_mount_opt, GRPID);
  775. break;
  776. case Opt_resuid:
  777. if (match_int(&args[0], &option))
  778. return 0;
  779. sbi->s_resuid = option;
  780. break;
  781. case Opt_resgid:
  782. if (match_int(&args[0], &option))
  783. return 0;
  784. sbi->s_resgid = option;
  785. break;
  786. case Opt_sb:
  787. /* handled by get_sb_block() instead of here */
  788. /* *sb_block = match_int(&args[0]); */
  789. break;
  790. case Opt_err_panic:
  791. clear_opt (sbi->s_mount_opt, ERRORS_CONT);
  792. clear_opt (sbi->s_mount_opt, ERRORS_RO);
  793. set_opt (sbi->s_mount_opt, ERRORS_PANIC);
  794. break;
  795. case Opt_err_ro:
  796. clear_opt (sbi->s_mount_opt, ERRORS_CONT);
  797. clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
  798. set_opt (sbi->s_mount_opt, ERRORS_RO);
  799. break;
  800. case Opt_err_cont:
  801. clear_opt (sbi->s_mount_opt, ERRORS_RO);
  802. clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
  803. set_opt (sbi->s_mount_opt, ERRORS_CONT);
  804. break;
  805. case Opt_nouid32:
  806. set_opt (sbi->s_mount_opt, NO_UID32);
  807. break;
  808. case Opt_nocheck:
  809. clear_opt (sbi->s_mount_opt, CHECK);
  810. break;
  811. case Opt_debug:
  812. set_opt (sbi->s_mount_opt, DEBUG);
  813. break;
  814. case Opt_oldalloc:
  815. set_opt (sbi->s_mount_opt, OLDALLOC);
  816. break;
  817. case Opt_orlov:
  818. clear_opt (sbi->s_mount_opt, OLDALLOC);
  819. break;
  820. #ifdef CONFIG_EXT3_FS_XATTR
  821. case Opt_user_xattr:
  822. set_opt (sbi->s_mount_opt, XATTR_USER);
  823. break;
  824. case Opt_nouser_xattr:
  825. clear_opt (sbi->s_mount_opt, XATTR_USER);
  826. break;
  827. #else
  828. case Opt_user_xattr:
  829. case Opt_nouser_xattr:
  830. printk("EXT3 (no)user_xattr options not supported\n");
  831. break;
  832. #endif
  833. #ifdef CONFIG_EXT3_FS_POSIX_ACL
  834. case Opt_acl:
  835. set_opt(sbi->s_mount_opt, POSIX_ACL);
  836. break;
  837. case Opt_noacl:
  838. clear_opt(sbi->s_mount_opt, POSIX_ACL);
  839. break;
  840. #else
  841. case Opt_acl:
  842. case Opt_noacl:
  843. printk("EXT3 (no)acl options not supported\n");
  844. break;
  845. #endif
  846. case Opt_reservation:
  847. set_opt(sbi->s_mount_opt, RESERVATION);
  848. break;
  849. case Opt_noreservation:
  850. clear_opt(sbi->s_mount_opt, RESERVATION);
  851. break;
  852. case Opt_journal_update:
  853. /* @@@ FIXME */
  854. /* Eventually we will want to be able to create
  855. a journal file here. For now, only allow the
  856. user to specify an existing inode to be the
  857. journal file. */
  858. if (is_remount) {
  859. printk(KERN_ERR "EXT3-fs: cannot specify "
  860. "journal on remount\n");
  861. return 0;
  862. }
  863. set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
  864. break;
  865. case Opt_journal_inum:
  866. if (is_remount) {
  867. printk(KERN_ERR "EXT3-fs: cannot specify "
  868. "journal on remount\n");
  869. return 0;
  870. }
  871. if (match_int(&args[0], &option))
  872. return 0;
  873. *inum = option;
  874. break;
  875. case Opt_journal_dev:
  876. if (is_remount) {
  877. printk(KERN_ERR "EXT3-fs: cannot specify "
  878. "journal on remount\n");
  879. return 0;
  880. }
  881. if (match_int(&args[0], &option))
  882. return 0;
  883. *journal_devnum = option;
  884. break;
  885. case Opt_noload:
  886. set_opt (sbi->s_mount_opt, NOLOAD);
  887. break;
  888. case Opt_commit:
  889. if (match_int(&args[0], &option))
  890. return 0;
  891. if (option < 0)
  892. return 0;
  893. if (option == 0)
  894. option = JBD_DEFAULT_MAX_COMMIT_AGE;
  895. sbi->s_commit_interval = HZ * option;
  896. break;
  897. case Opt_data_journal:
  898. data_opt = EXT3_MOUNT_JOURNAL_DATA;
  899. goto datacheck;
  900. case Opt_data_ordered:
  901. data_opt = EXT3_MOUNT_ORDERED_DATA;
  902. goto datacheck;
  903. case Opt_data_writeback:
  904. data_opt = EXT3_MOUNT_WRITEBACK_DATA;
  905. datacheck:
  906. if (is_remount) {
  907. if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS)
  908. != data_opt) {
  909. printk(KERN_ERR
  910. "EXT3-fs: cannot change data "
  911. "mode on remount\n");
  912. return 0;
  913. }
  914. } else {
  915. sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS;
  916. sbi->s_mount_opt |= data_opt;
  917. }
  918. break;
  919. case Opt_data_err_abort:
  920. set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
  921. break;
  922. case Opt_data_err_ignore:
  923. clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
  924. break;
  925. #ifdef CONFIG_QUOTA
  926. case Opt_usrjquota:
  927. qtype = USRQUOTA;
  928. goto set_qf_name;
  929. case Opt_grpjquota:
  930. qtype = GRPQUOTA;
  931. set_qf_name:
  932. if (sb_any_quota_loaded(sb) &&
  933. !sbi->s_qf_names[qtype]) {
  934. printk(KERN_ERR
  935. "EXT3-fs: Cannot change journaled "
  936. "quota options when quota turned on.\n");
  937. return 0;
  938. }
  939. qname = match_strdup(&args[0]);
  940. if (!qname) {
  941. printk(KERN_ERR
  942. "EXT3-fs: not enough memory for "
  943. "storing quotafile name.\n");
  944. return 0;
  945. }
  946. if (sbi->s_qf_names[qtype] &&
  947. strcmp(sbi->s_qf_names[qtype], qname)) {
  948. printk(KERN_ERR
  949. "EXT3-fs: %s quota file already "
  950. "specified.\n", QTYPE2NAME(qtype));
  951. kfree(qname);
  952. return 0;
  953. }
  954. sbi->s_qf_names[qtype] = qname;
  955. if (strchr(sbi->s_qf_names[qtype], '/')) {
  956. printk(KERN_ERR
  957. "EXT3-fs: quotafile must be on "
  958. "filesystem root.\n");
  959. kfree(sbi->s_qf_names[qtype]);
  960. sbi->s_qf_names[qtype] = NULL;
  961. return 0;
  962. }
  963. set_opt(sbi->s_mount_opt, QUOTA);
  964. break;
  965. case Opt_offusrjquota:
  966. qtype = USRQUOTA;
  967. goto clear_qf_name;
  968. case Opt_offgrpjquota:
  969. qtype = GRPQUOTA;
  970. clear_qf_name:
  971. if (sb_any_quota_loaded(sb) &&
  972. sbi->s_qf_names[qtype]) {
  973. printk(KERN_ERR "EXT3-fs: Cannot change "
  974. "journaled quota options when "
  975. "quota turned on.\n");
  976. return 0;
  977. }
  978. /*
  979. * The space will be released later when all options
  980. * are confirmed to be correct
  981. */
  982. sbi->s_qf_names[qtype] = NULL;
  983. break;
  984. case Opt_jqfmt_vfsold:
  985. qfmt = QFMT_VFS_OLD;
  986. goto set_qf_format;
  987. case Opt_jqfmt_vfsv0:
  988. qfmt = QFMT_VFS_V0;
  989. set_qf_format:
  990. if (sb_any_quota_loaded(sb) &&
  991. sbi->s_jquota_fmt != qfmt) {
  992. printk(KERN_ERR "EXT3-fs: Cannot change "
  993. "journaled quota options when "
  994. "quota turned on.\n");
  995. return 0;
  996. }
  997. sbi->s_jquota_fmt = qfmt;
  998. break;
  999. case Opt_quota:
  1000. case Opt_usrquota:
  1001. set_opt(sbi->s_mount_opt, QUOTA);
  1002. set_opt(sbi->s_mount_opt, USRQUOTA);
  1003. break;
  1004. case Opt_grpquota:
  1005. set_opt(sbi->s_mount_opt, QUOTA);
  1006. set_opt(sbi->s_mount_opt, GRPQUOTA);
  1007. break;
  1008. case Opt_noquota:
  1009. if (sb_any_quota_loaded(sb)) {
  1010. printk(KERN_ERR "EXT3-fs: Cannot change quota "
  1011. "options when quota turned on.\n");
  1012. return 0;
  1013. }
  1014. clear_opt(sbi->s_mount_opt, QUOTA);
  1015. clear_opt(sbi->s_mount_opt, USRQUOTA);
  1016. clear_opt(sbi->s_mount_opt, GRPQUOTA);
  1017. break;
  1018. #else
  1019. case Opt_quota:
  1020. case Opt_usrquota:
  1021. case Opt_grpquota:
  1022. printk(KERN_ERR
  1023. "EXT3-fs: quota options not supported.\n");
  1024. break;
  1025. case Opt_usrjquota:
  1026. case Opt_grpjquota:
  1027. case Opt_offusrjquota:
  1028. case Opt_offgrpjquota:
  1029. case Opt_jqfmt_vfsold:
  1030. case Opt_jqfmt_vfsv0:
  1031. printk(KERN_ERR
  1032. "EXT3-fs: journaled quota options not "
  1033. "supported.\n");
  1034. break;
  1035. case Opt_noquota:
  1036. break;
  1037. #endif
  1038. case Opt_abort:
  1039. set_opt(sbi->s_mount_opt, ABORT);
  1040. break;
  1041. case Opt_barrier:
  1042. if (match_int(&args[0], &option))
  1043. return 0;
  1044. if (option)
  1045. set_opt(sbi->s_mount_opt, BARRIER);
  1046. else
  1047. clear_opt(sbi->s_mount_opt, BARRIER);
  1048. break;
  1049. case Opt_ignore:
  1050. break;
  1051. case Opt_resize:
  1052. if (!is_remount) {
  1053. printk("EXT3-fs: resize option only available "
  1054. "for remount\n");
  1055. return 0;
  1056. }
  1057. if (match_int(&args[0], &option) != 0)
  1058. return 0;
  1059. *n_blocks_count = option;
  1060. break;
  1061. case Opt_nobh:
  1062. set_opt(sbi->s_mount_opt, NOBH);
  1063. break;
  1064. case Opt_bh:
  1065. clear_opt(sbi->s_mount_opt, NOBH);
  1066. break;
  1067. default:
  1068. printk (KERN_ERR
  1069. "EXT3-fs: Unrecognized mount option \"%s\" "
  1070. "or missing value\n", p);
  1071. return 0;
  1072. }
  1073. }
  1074. #ifdef CONFIG_QUOTA
  1075. if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
  1076. if ((sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) &&
  1077. sbi->s_qf_names[USRQUOTA])
  1078. clear_opt(sbi->s_mount_opt, USRQUOTA);
  1079. if ((sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) &&
  1080. sbi->s_qf_names[GRPQUOTA])
  1081. clear_opt(sbi->s_mount_opt, GRPQUOTA);
  1082. if ((sbi->s_qf_names[USRQUOTA] &&
  1083. (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) ||
  1084. (sbi->s_qf_names[GRPQUOTA] &&
  1085. (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) {
  1086. printk(KERN_ERR "EXT3-fs: old and new quota "
  1087. "format mixing.\n");
  1088. return 0;
  1089. }
  1090. if (!sbi->s_jquota_fmt) {
  1091. printk(KERN_ERR "EXT3-fs: journaled quota format "
  1092. "not specified.\n");
  1093. return 0;
  1094. }
  1095. } else {
  1096. if (sbi->s_jquota_fmt) {
  1097. printk(KERN_ERR "EXT3-fs: journaled quota format "
  1098. "specified with no journaling "
  1099. "enabled.\n");
  1100. return 0;
  1101. }
  1102. }
  1103. #endif
  1104. return 1;
  1105. }
  1106. static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
  1107. int read_only)
  1108. {
  1109. struct ext3_sb_info *sbi = EXT3_SB(sb);
  1110. int res = 0;
  1111. if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) {
  1112. printk (KERN_ERR "EXT3-fs warning: revision level too high, "
  1113. "forcing read-only mode\n");
  1114. res = MS_RDONLY;
  1115. }
  1116. if (read_only)
  1117. return res;
  1118. if (!(sbi->s_mount_state & EXT3_VALID_FS))
  1119. printk (KERN_WARNING "EXT3-fs warning: mounting unchecked fs, "
  1120. "running e2fsck is recommended\n");
  1121. else if ((sbi->s_mount_state & EXT3_ERROR_FS))
  1122. printk (KERN_WARNING
  1123. "EXT3-fs warning: mounting fs with errors, "
  1124. "running e2fsck is recommended\n");
  1125. else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
  1126. le16_to_cpu(es->s_mnt_count) >=
  1127. (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
  1128. printk (KERN_WARNING
  1129. "EXT3-fs warning: maximal mount count reached, "
  1130. "running e2fsck is recommended\n");
  1131. else if (le32_to_cpu(es->s_checkinterval) &&
  1132. (le32_to_cpu(es->s_lastcheck) +
  1133. le32_to_cpu(es->s_checkinterval) <= get_seconds()))
  1134. printk (KERN_WARNING
  1135. "EXT3-fs warning: checktime reached, "
  1136. "running e2fsck is recommended\n");
  1137. #if 0
  1138. /* @@@ We _will_ want to clear the valid bit if we find
  1139. inconsistencies, to force a fsck at reboot. But for
  1140. a plain journaled filesystem we can keep it set as
  1141. valid forever! :) */
  1142. es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
  1143. #endif
  1144. if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
  1145. es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
  1146. le16_add_cpu(&es->s_mnt_count, 1);
  1147. es->s_mtime = cpu_to_le32(get_seconds());
  1148. ext3_update_dynamic_rev(sb);
  1149. EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
  1150. ext3_commit_super(sb, es, 1);
  1151. if (test_opt(sb, DEBUG))
  1152. printk(KERN_INFO "[EXT3 FS bs=%lu, gc=%lu, "
  1153. "bpg=%lu, ipg=%lu, mo=%04lx]\n",
  1154. sb->s_blocksize,
  1155. sbi->s_groups_count,
  1156. EXT3_BLOCKS_PER_GROUP(sb),
  1157. EXT3_INODES_PER_GROUP(sb),
  1158. sbi->s_mount_opt);
  1159. printk(KERN_INFO "EXT3 FS on %s, ", sb->s_id);
  1160. if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
  1161. char b[BDEVNAME_SIZE];
  1162. printk("external journal on %s\n",
  1163. bdevname(EXT3_SB(sb)->s_journal->j_dev, b));
  1164. } else {
  1165. printk("internal journal\n");
  1166. }
  1167. return res;
  1168. }
  1169. /* Called at mount-time, super-block is locked */
  1170. static int ext3_check_descriptors(struct super_block *sb)
  1171. {
  1172. struct ext3_sb_info *sbi = EXT3_SB(sb);
  1173. int i;
  1174. ext3_debug ("Checking group descriptors");
  1175. for (i = 0; i < sbi->s_groups_count; i++) {
  1176. struct ext3_group_desc *gdp = ext3_get_group_desc(sb, i, NULL);
  1177. ext3_fsblk_t first_block = ext3_group_first_block_no(sb, i);
  1178. ext3_fsblk_t last_block;
  1179. if (i == sbi->s_groups_count - 1)
  1180. last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
  1181. else
  1182. last_block = first_block +
  1183. (EXT3_BLOCKS_PER_GROUP(sb) - 1);
  1184. if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
  1185. le32_to_cpu(gdp->bg_block_bitmap) > last_block)
  1186. {
  1187. ext3_error (sb, "ext3_check_descriptors",
  1188. "Block bitmap for group %d"
  1189. " not in group (block %lu)!",
  1190. i, (unsigned long)
  1191. le32_to_cpu(gdp->bg_block_bitmap));
  1192. return 0;
  1193. }
  1194. if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
  1195. le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
  1196. {
  1197. ext3_error (sb, "ext3_check_descriptors",
  1198. "Inode bitmap for group %d"
  1199. " not in group (block %lu)!",
  1200. i, (unsigned long)
  1201. le32_to_cpu(gdp->bg_inode_bitmap));
  1202. return 0;
  1203. }
  1204. if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
  1205. le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group - 1 >
  1206. last_block)
  1207. {
  1208. ext3_error (sb, "ext3_check_descriptors",
  1209. "Inode table for group %d"
  1210. " not in group (block %lu)!",
  1211. i, (unsigned long)
  1212. le32_to_cpu(gdp->bg_inode_table));
  1213. return 0;
  1214. }
  1215. }
  1216. sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb));
  1217. sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb));
  1218. return 1;
  1219. }
  1220. /* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at
  1221. * the superblock) which were deleted from all directories, but held open by
  1222. * a process at the time of a crash. We walk the list and try to delete these
  1223. * inodes at recovery time (only with a read-write filesystem).
  1224. *
  1225. * In order to keep the orphan inode chain consistent during traversal (in
  1226. * case of crash during recovery), we link each inode into the superblock
  1227. * orphan list_head and handle it the same way as an inode deletion during
  1228. * normal operation (which journals the operations for us).
  1229. *
  1230. * We only do an iget() and an iput() on each inode, which is very safe if we
  1231. * accidentally point at an in-use or already deleted inode. The worst that
  1232. * can happen in this case is that we get a "bit already cleared" message from
  1233. * ext3_free_inode(). The only reason we would point at a wrong inode is if
  1234. * e2fsck was run on this filesystem, and it must have already done the orphan
  1235. * inode cleanup for us, so we can safely abort without any further action.
  1236. */
  1237. static void ext3_orphan_cleanup (struct super_block * sb,
  1238. struct ext3_super_block * es)
  1239. {
  1240. unsigned int s_flags = sb->s_flags;
  1241. int nr_orphans = 0, nr_truncates = 0;
  1242. #ifdef CONFIG_QUOTA
  1243. int i;
  1244. #endif
  1245. if (!es->s_last_orphan) {
  1246. jbd_debug(4, "no orphan inodes to clean up\n");
  1247. return;
  1248. }
  1249. if (bdev_read_only(sb->s_bdev)) {
  1250. printk(KERN_ERR "EXT3-fs: write access "
  1251. "unavailable, skipping orphan cleanup.\n");
  1252. return;
  1253. }
  1254. if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
  1255. if (es->s_last_orphan)
  1256. jbd_debug(1, "Errors on filesystem, "
  1257. "clearing orphan list.\n");
  1258. es->s_last_orphan = 0;
  1259. jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
  1260. return;
  1261. }
  1262. if (s_flags & MS_RDONLY) {
  1263. printk(KERN_INFO "EXT3-fs: %s: orphan cleanup on readonly fs\n",
  1264. sb->s_id);
  1265. sb->s_flags &= ~MS_RDONLY;
  1266. }
  1267. #ifdef CONFIG_QUOTA
  1268. /* Needed for iput() to work correctly and not trash data */
  1269. sb->s_flags |= MS_ACTIVE;
  1270. /* Turn on quotas so that they are updated correctly */
  1271. for (i = 0; i < MAXQUOTAS; i++) {
  1272. if (EXT3_SB(sb)->s_qf_names[i]) {
  1273. int ret = ext3_quota_on_mount(sb, i);
  1274. if (ret < 0)
  1275. printk(KERN_ERR
  1276. "EXT3-fs: Cannot turn on journaled "
  1277. "quota: error %d\n", ret);
  1278. }
  1279. }
  1280. #endif
  1281. while (es->s_last_orphan) {
  1282. struct inode *inode;
  1283. inode = ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
  1284. if (IS_ERR(inode)) {
  1285. es->s_last_orphan = 0;
  1286. break;
  1287. }
  1288. list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
  1289. DQUOT_INIT(inode);
  1290. if (inode->i_nlink) {
  1291. printk(KERN_DEBUG
  1292. "%s: truncating inode %lu to %Ld bytes\n",
  1293. __func__, inode->i_ino, inode->i_size);
  1294. jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
  1295. inode->i_ino, inode->i_size);
  1296. ext3_truncate(inode);
  1297. nr_truncates++;
  1298. } else {
  1299. printk(KERN_DEBUG
  1300. "%s: deleting unreferenced inode %lu\n",
  1301. __func__, inode->i_ino);
  1302. jbd_debug(2, "deleting unreferenced inode %lu\n",
  1303. inode->i_ino);
  1304. nr_orphans++;
  1305. }
  1306. iput(inode); /* The delete magic happens here! */
  1307. }
  1308. #define PLURAL(x) (x), ((x)==1) ? "" : "s"
  1309. if (nr_orphans)
  1310. printk(KERN_INFO "EXT3-fs: %s: %d orphan inode%s deleted\n",
  1311. sb->s_id, PLURAL(nr_orphans));
  1312. if (nr_truncates)
  1313. printk(KERN_INFO "EXT3-fs: %s: %d truncate%s cleaned up\n",
  1314. sb->s_id, PLURAL(nr_truncates));
  1315. #ifdef CONFIG_QUOTA
  1316. /* Turn quotas off */
  1317. for (i = 0; i < MAXQUOTAS; i++) {
  1318. if (sb_dqopt(sb)->files[i])
  1319. vfs_quota_off(sb, i, 0);
  1320. }
  1321. #endif
  1322. sb->s_flags = s_flags; /* Restore MS_RDONLY status */
  1323. }
  1324. /*
  1325. * Maximal file size. There is a direct, and {,double-,triple-}indirect
  1326. * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
  1327. * We need to be 1 filesystem block less than the 2^32 sector limit.
  1328. */
  1329. static loff_t ext3_max_size(int bits)
  1330. {
  1331. loff_t res = EXT3_NDIR_BLOCKS;
  1332. int meta_blocks;
  1333. loff_t upper_limit;
  1334. /* This is calculated to be the largest file size for a
  1335. * dense, file such that the total number of
  1336. * sectors in the file, including data and all indirect blocks,
  1337. * does not exceed 2^32 -1
  1338. * __u32 i_blocks representing the total number of
  1339. * 512 bytes blocks of the file
  1340. */
  1341. upper_limit = (1LL << 32) - 1;
  1342. /* total blocks in file system block size */
  1343. upper_limit >>= (bits - 9);
  1344. /* indirect blocks */
  1345. meta_blocks = 1;
  1346. /* double indirect blocks */
  1347. meta_blocks += 1 + (1LL << (bits-2));
  1348. /* tripple indirect blocks */
  1349. meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
  1350. upper_limit -= meta_blocks;
  1351. upper_limit <<= bits;
  1352. res += 1LL << (bits-2);
  1353. res += 1LL << (2*(bits-2));
  1354. res += 1LL << (3*(bits-2));
  1355. res <<= bits;
  1356. if (res > upper_limit)
  1357. res = upper_limit;
  1358. if (res > MAX_LFS_FILESIZE)
  1359. res = MAX_LFS_FILESIZE;
  1360. return res;
  1361. }
  1362. static ext3_fsblk_t descriptor_loc(struct super_block *sb,
  1363. ext3_fsblk_t logic_sb_block,
  1364. int nr)
  1365. {
  1366. struct ext3_sb_info *sbi = EXT3_SB(sb);
  1367. unsigned long bg, first_meta_bg;
  1368. int has_super = 0;
  1369. first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
  1370. if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) ||
  1371. nr < first_meta_bg)
  1372. return (logic_sb_block + nr + 1);
  1373. bg = sbi->s_desc_per_block * nr;
  1374. if (ext3_bg_has_super(sb, bg))
  1375. has_super = 1;
  1376. return (has_super + ext3_group_first_block_no(sb, bg));
  1377. }
  1378. static int ext3_fill_super (struct super_block *sb, void *data, int silent)
  1379. {
  1380. struct buffer_head * bh;
  1381. struct ext3_super_block *es = NULL;
  1382. struct ext3_sb_info *sbi;
  1383. ext3_fsblk_t block;
  1384. ext3_fsblk_t sb_block = get_sb_block(&data);
  1385. ext3_fsblk_t logic_sb_block;
  1386. unsigned long offset = 0;
  1387. unsigned int journal_inum = 0;
  1388. unsigned long journal_devnum = 0;
  1389. unsigned long def_mount_opts;
  1390. struct inode *root;
  1391. int blocksize;
  1392. int hblock;
  1393. int db_count;
  1394. int i;
  1395. int needs_recovery;
  1396. int ret = -EINVAL;
  1397. __le32 features;
  1398. int err;
  1399. sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
  1400. if (!sbi)
  1401. return -ENOMEM;
  1402. sb->s_fs_info = sbi;
  1403. sbi->s_mount_opt = 0;
  1404. sbi->s_resuid = EXT3_DEF_RESUID;
  1405. sbi->s_resgid = EXT3_DEF_RESGID;
  1406. sbi->s_sb_block = sb_block;
  1407. unlock_kernel();
  1408. blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
  1409. if (!blocksize) {
  1410. printk(KERN_ERR "EXT3-fs: unable to set blocksize\n");
  1411. goto out_fail;
  1412. }
  1413. /*
  1414. * The ext3 superblock will not be buffer aligned for other than 1kB
  1415. * block sizes. We need to calculate the offset from buffer start.
  1416. */
  1417. if (blocksize != EXT3_MIN_BLOCK_SIZE) {
  1418. logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
  1419. offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
  1420. } else {
  1421. logic_sb_block = sb_block;
  1422. }
  1423. if (!(bh = sb_bread(sb, logic_sb_block))) {
  1424. printk (KERN_ERR "EXT3-fs: unable to read superblock\n");
  1425. goto out_fail;
  1426. }
  1427. /*
  1428. * Note: s_es must be initialized as soon as possible because
  1429. * some ext3 macro-instructions depend on its value
  1430. */
  1431. es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
  1432. sbi->s_es = es;
  1433. sb->s_magic = le16_to_cpu(es->s_magic);
  1434. if (sb->s_magic != EXT3_SUPER_MAGIC)
  1435. goto cantfind_ext3;
  1436. /* Set defaults before we parse the mount options */
  1437. def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
  1438. if (def_mount_opts & EXT3_DEFM_DEBUG)
  1439. set_opt(sbi->s_mount_opt, DEBUG);
  1440. if (def_mount_opts & EXT3_DEFM_BSDGROUPS)
  1441. set_opt(sbi->s_mount_opt, GRPID);
  1442. if (def_mount_opts & EXT3_DEFM_UID16)
  1443. set_opt(sbi->s_mount_opt, NO_UID32);
  1444. #ifdef CONFIG_EXT3_FS_XATTR
  1445. if (def_mount_opts & EXT3_DEFM_XATTR_USER)
  1446. set_opt(sbi->s_mount_opt, XATTR_USER);
  1447. #endif
  1448. #ifdef CONFIG_EXT3_FS_POSIX_ACL
  1449. if (def_mount_opts & EXT3_DEFM_ACL)
  1450. set_opt(sbi->s_mount_opt, POSIX_ACL);
  1451. #endif
  1452. if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
  1453. sbi->s_mount_opt |= EXT3_MOUNT_JOURNAL_DATA;
  1454. else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
  1455. sbi->s_mount_opt |= EXT3_MOUNT_ORDERED_DATA;
  1456. else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK)
  1457. sbi->s_mount_opt |= EXT3_MOUNT_WRITEBACK_DATA;
  1458. if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC)
  1459. set_opt(sbi->s_mount_opt, ERRORS_PANIC);
  1460. else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_CONTINUE)
  1461. set_opt(sbi->s_mount_opt, ERRORS_CONT);
  1462. else
  1463. set_opt(sbi->s_mount_opt, ERRORS_RO);
  1464. sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
  1465. sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
  1466. set_opt(sbi->s_mount_opt, RESERVATION);
  1467. if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
  1468. NULL, 0))
  1469. goto failed_mount;
  1470. sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
  1471. ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
  1472. if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV &&
  1473. (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
  1474. EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
  1475. EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
  1476. printk(KERN_WARNING
  1477. "EXT3-fs warning: feature flags set on rev 0 fs, "
  1478. "running e2fsck is recommended\n");
  1479. /*
  1480. * Check feature flags regardless of the revision level, since we
  1481. * previously didn't change the revision level when setting the flags,
  1482. * so there is a chance incompat flags are set on a rev 0 filesystem.
  1483. */
  1484. features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
  1485. if (features) {
  1486. printk(KERN_ERR "EXT3-fs: %s: couldn't mount because of "
  1487. "unsupported optional features (%x).\n",
  1488. sb->s_id, le32_to_cpu(features));
  1489. goto failed_mount;
  1490. }
  1491. features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
  1492. if (!(sb->s_flags & MS_RDONLY) && features) {
  1493. printk(KERN_ERR "EXT3-fs: %s: couldn't mount RDWR because of "
  1494. "unsupported optional features (%x).\n",
  1495. sb->s_id, le32_to_cpu(features));
  1496. goto failed_mount;
  1497. }
  1498. blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
  1499. if (blocksize < EXT3_MIN_BLOCK_SIZE ||
  1500. blocksize > EXT3_MAX_BLOCK_SIZE) {
  1501. printk(KERN_ERR
  1502. "EXT3-fs: Unsupported filesystem blocksize %d on %s.\n",
  1503. blocksize, sb->s_id);
  1504. goto failed_mount;
  1505. }
  1506. hblock = bdev_hardsect_size(sb->s_bdev);
  1507. if (sb->s_blocksize != blocksize) {
  1508. /*
  1509. * Make sure the blocksize for the filesystem is larger
  1510. * than the hardware sectorsize for the machine.
  1511. */
  1512. if (blocksize < hblock) {
  1513. printk(KERN_ERR "EXT3-fs: blocksize %d too small for "
  1514. "device blocksize %d.\n", blocksize, hblock);
  1515. goto failed_mount;
  1516. }
  1517. brelse (bh);
  1518. if (!sb_set_blocksize(sb, blocksize)) {
  1519. printk(KERN_ERR "EXT3-fs: bad blocksize %d.\n",
  1520. blocksize);
  1521. goto out_fail;
  1522. }
  1523. logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
  1524. offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
  1525. bh = sb_bread(sb, logic_sb_block);
  1526. if (!bh) {
  1527. printk(KERN_ERR
  1528. "EXT3-fs: Can't read superblock on 2nd try.\n");
  1529. goto failed_mount;
  1530. }
  1531. es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
  1532. sbi->s_es = es;
  1533. if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
  1534. printk (KERN_ERR
  1535. "EXT3-fs: Magic mismatch, very weird !\n");
  1536. goto failed_mount;
  1537. }
  1538. }
  1539. sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits);
  1540. if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) {
  1541. sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE;
  1542. sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO;
  1543. } else {
  1544. sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
  1545. sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
  1546. if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) ||
  1547. (!is_power_of_2(sbi->s_inode_size)) ||
  1548. (sbi->s_inode_size > blocksize)) {
  1549. printk (KERN_ERR
  1550. "EXT3-fs: unsupported inode size: %d\n",
  1551. sbi->s_inode_size);
  1552. goto failed_mount;
  1553. }
  1554. }
  1555. sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
  1556. le32_to_cpu(es->s_log_frag_size);
  1557. if (blocksize != sbi->s_frag_size) {
  1558. printk(KERN_ERR
  1559. "EXT3-fs: fragsize %lu != blocksize %u (unsupported)\n",
  1560. sbi->s_frag_size, blocksize);
  1561. goto failed_mount;
  1562. }
  1563. sbi->s_frags_per_block = 1;
  1564. sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
  1565. sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
  1566. sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
  1567. if (EXT3_INODE_SIZE(sb) == 0 || EXT3_INODES_PER_GROUP(sb) == 0)
  1568. goto cantfind_ext3;
  1569. sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb);
  1570. if (sbi->s_inodes_per_block == 0)
  1571. goto cantfind_ext3;
  1572. sbi->s_itb_per_group = sbi->s_inodes_per_group /
  1573. sbi->s_inodes_per_block;
  1574. sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc);
  1575. sbi->s_sbh = bh;
  1576. sbi->s_mount_state = le16_to_cpu(es->s_state);
  1577. sbi->s_addr_per_block_bits = ilog2(EXT3_ADDR_PER_BLOCK(sb));
  1578. sbi->s_desc_per_block_bits = ilog2(EXT3_DESC_PER_BLOCK(sb));
  1579. for (i=0; i < 4; i++)
  1580. sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
  1581. sbi->s_def_hash_version = es->s_def_hash_version;
  1582. if (sbi->s_blocks_per_group > blocksize * 8) {
  1583. printk (KERN_ERR
  1584. "EXT3-fs: #blocks per group too big: %lu\n",
  1585. sbi->s_blocks_per_group);
  1586. goto failed_mount;
  1587. }
  1588. if (sbi->s_frags_per_group > blocksize * 8) {
  1589. printk (KERN_ERR
  1590. "EXT3-fs: #fragments per group too big: %lu\n",
  1591. sbi->s_frags_per_group);
  1592. goto failed_mount;
  1593. }
  1594. if (sbi->s_inodes_per_group > blocksize * 8) {
  1595. printk (KERN_ERR
  1596. "EXT3-fs: #inodes per group too big: %lu\n",
  1597. sbi->s_inodes_per_group);
  1598. goto failed_mount;
  1599. }
  1600. if (le32_to_cpu(es->s_blocks_count) >
  1601. (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
  1602. printk(KERN_ERR "EXT3-fs: filesystem on %s:"
  1603. " too large to mount safely\n", sb->s_id);
  1604. if (sizeof(sector_t) < 8)
  1605. printk(KERN_WARNING "EXT3-fs: CONFIG_LBD not "
  1606. "enabled\n");
  1607. goto failed_mount;
  1608. }
  1609. if (EXT3_BLOCKS_PER_GROUP(sb) == 0)
  1610. goto cantfind_ext3;
  1611. sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
  1612. le32_to_cpu(es->s_first_data_block) - 1)
  1613. / EXT3_BLOCKS_PER_GROUP(sb)) + 1;
  1614. db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) /
  1615. EXT3_DESC_PER_BLOCK(sb);
  1616. sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
  1617. GFP_KERNEL);
  1618. if (sbi->s_group_desc == NULL) {
  1619. printk (KERN_ERR "EXT3-fs: not enough memory\n");
  1620. goto failed_mount;
  1621. }
  1622. bgl_lock_init(&sbi->s_blockgroup_lock);
  1623. for (i = 0; i < db_count; i++) {
  1624. block = descriptor_loc(sb, logic_sb_block, i);
  1625. sbi->s_group_desc[i] = sb_bread(sb, block);
  1626. if (!sbi->s_group_desc[i]) {
  1627. printk (KERN_ERR "EXT3-fs: "
  1628. "can't read group descriptor %d\n", i);
  1629. db_count = i;
  1630. goto failed_mount2;
  1631. }
  1632. }
  1633. if (!ext3_check_descriptors (sb)) {
  1634. printk(KERN_ERR "EXT3-fs: group descriptors corrupted!\n");
  1635. goto failed_mount2;
  1636. }
  1637. sbi->s_gdb_count = db_count;
  1638. get_random_bytes(&sbi->s_next_generation, sizeof(u32));
  1639. spin_lock_init(&sbi->s_next_gen_lock);
  1640. err = percpu_counter_init(&sbi->s_freeblocks_counter,
  1641. ext3_count_free_blocks(sb));
  1642. if (!err) {
  1643. err = percpu_counter_init(&sbi->s_freeinodes_counter,
  1644. ext3_count_free_inodes(sb));
  1645. }
  1646. if (!err) {
  1647. err = percpu_counter_init(&sbi->s_dirs_counter,
  1648. ext3_count_dirs(sb));
  1649. }
  1650. if (err) {
  1651. printk(KERN_ERR "EXT3-fs: insufficient memory\n");
  1652. goto failed_mount3;
  1653. }
  1654. /* per fileystem reservation list head & lock */
  1655. spin_lock_init(&sbi->s_rsv_window_lock);
  1656. sbi->s_rsv_window_root = RB_ROOT;
  1657. /* Add a single, static dummy reservation to the start of the
  1658. * reservation window list --- it gives us a placeholder for
  1659. * append-at-start-of-list which makes the allocation logic
  1660. * _much_ simpler. */
  1661. sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
  1662. sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
  1663. sbi->s_rsv_window_head.rsv_alloc_hit = 0;
  1664. sbi->s_rsv_window_head.rsv_goal_size = 0;
  1665. ext3_rsv_window_add(sb, &sbi->s_rsv_window_head);
  1666. /*
  1667. * set up enough so that it can read an inode
  1668. */
  1669. sb->s_op = &ext3_sops;
  1670. sb->s_export_op = &ext3_export_ops;
  1671. sb->s_xattr = ext3_xattr_handlers;
  1672. #ifdef CONFIG_QUOTA
  1673. sb->s_qcop = &ext3_qctl_operations;
  1674. sb->dq_op = &ext3_quota_operations;
  1675. #endif
  1676. INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
  1677. sb->s_root = NULL;
  1678. needs_recovery = (es->s_last_orphan != 0 ||
  1679. EXT3_HAS_INCOMPAT_FEATURE(sb,
  1680. EXT3_FEATURE_INCOMPAT_RECOVER));
  1681. /*
  1682. * The first inode we look at is the journal inode. Don't try
  1683. * root first: it may be modified in the journal!
  1684. */
  1685. if (!test_opt(sb, NOLOAD) &&
  1686. EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
  1687. if (ext3_load_journal(sb, es, journal_devnum))
  1688. goto failed_mount3;
  1689. } else if (journal_inum) {
  1690. if (ext3_create_journal(sb, es, journal_inum))
  1691. goto failed_mount3;
  1692. } else {
  1693. if (!silent)
  1694. printk (KERN_ERR
  1695. "ext3: No journal on filesystem on %s\n",
  1696. sb->s_id);
  1697. goto failed_mount3;
  1698. }
  1699. /* We have now updated the journal if required, so we can
  1700. * validate the data journaling mode. */
  1701. switch (test_opt(sb, DATA_FLAGS)) {
  1702. case 0:
  1703. /* No mode set, assume a default based on the journal
  1704. capabilities: ORDERED_DATA if the journal can
  1705. cope, else JOURNAL_DATA */
  1706. if (journal_check_available_features
  1707. (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE))
  1708. set_opt(sbi->s_mount_opt, ORDERED_DATA);
  1709. else
  1710. set_opt(sbi->s_mount_opt, JOURNAL_DATA);
  1711. break;
  1712. case EXT3_MOUNT_ORDERED_DATA:
  1713. case EXT3_MOUNT_WRITEBACK_DATA:
  1714. if (!journal_check_available_features
  1715. (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
  1716. printk(KERN_ERR "EXT3-fs: Journal does not support "
  1717. "requested data journaling mode\n");
  1718. goto failed_mount4;
  1719. }
  1720. default:
  1721. break;
  1722. }
  1723. if (test_opt(sb, NOBH)) {
  1724. if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
  1725. printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - "
  1726. "its supported only with writeback mode\n");
  1727. clear_opt(sbi->s_mount_opt, NOBH);
  1728. }
  1729. }
  1730. /*
  1731. * The journal_load will have done any necessary log recovery,
  1732. * so we can safely mount the rest of the filesystem now.
  1733. */
  1734. root = ext3_iget(sb, EXT3_ROOT_INO);
  1735. if (IS_ERR(root)) {
  1736. printk(KERN_ERR "EXT3-fs: get root inode failed\n");
  1737. ret = PTR_ERR(root);
  1738. goto failed_mount4;
  1739. }
  1740. if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
  1741. iput(root);
  1742. printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n");
  1743. goto failed_mount4;
  1744. }
  1745. sb->s_root = d_alloc_root(root);
  1746. if (!sb->s_root) {
  1747. printk(KERN_ERR "EXT3-fs: get root dentry failed\n");
  1748. iput(root);
  1749. ret = -ENOMEM;
  1750. goto failed_mount4;
  1751. }
  1752. ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
  1753. /*
  1754. * akpm: core read_super() calls in here with the superblock locked.
  1755. * That deadlocks, because orphan cleanup needs to lock the superblock
  1756. * in numerous places. Here we just pop the lock - it's relatively
  1757. * harmless, because we are now ready to accept write_super() requests,
  1758. * and aviro says that's the only reason for hanging onto the
  1759. * superblock lock.
  1760. */
  1761. EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
  1762. ext3_orphan_cleanup(sb, es);
  1763. EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
  1764. if (needs_recovery)
  1765. printk (KERN_INFO "EXT3-fs: recovery complete.\n");
  1766. ext3_mark_recovery_complete(sb, es);
  1767. printk (KERN_INFO "EXT3-fs: mounted filesystem with %s data mode.\n",
  1768. test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
  1769. test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
  1770. "writeback");
  1771. lock_kernel();
  1772. return 0;
  1773. cantfind_ext3:
  1774. if (!silent)
  1775. printk(KERN_ERR "VFS: Can't find ext3 filesystem on dev %s.\n",
  1776. sb->s_id);
  1777. goto failed_mount;
  1778. failed_mount4:
  1779. journal_destroy(sbi->s_journal);
  1780. failed_mount3:
  1781. percpu_counter_destroy(&sbi->s_freeblocks_counter);
  1782. percpu_counter_destroy(&sbi->s_freeinodes_counter);
  1783. percpu_counter_destroy(&sbi->s_dirs_counter);
  1784. failed_mount2:
  1785. for (i = 0; i < db_count; i++)
  1786. brelse(sbi->s_group_desc[i]);
  1787. kfree(sbi->s_group_desc);
  1788. failed_mount:
  1789. #ifdef CONFIG_QUOTA
  1790. for (i = 0; i < MAXQUOTAS; i++)
  1791. kfree(sbi->s_qf_names[i]);
  1792. #endif
  1793. ext3_blkdev_remove(sbi);
  1794. brelse(bh);
  1795. out_fail:
  1796. sb->s_fs_info = NULL;
  1797. kfree(sbi);
  1798. lock_kernel();
  1799. return ret;
  1800. }
  1801. /*
  1802. * Setup any per-fs journal parameters now. We'll do this both on
  1803. * initial mount, once the journal has been initialised but before we've
  1804. * done any recovery; and again on any subsequent remount.
  1805. */
  1806. static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
  1807. {
  1808. struct ext3_sb_info *sbi = EXT3_SB(sb);
  1809. if (sbi->s_commit_interval)
  1810. journal->j_commit_interval = sbi->s_commit_interval;
  1811. /* We could also set up an ext3-specific default for the commit
  1812. * interval here, but for now we'll just fall back to the jbd
  1813. * default. */
  1814. spin_lock(&journal->j_state_lock);
  1815. if (test_opt(sb, BARRIER))
  1816. journal->j_flags |= JFS_BARRIER;
  1817. else
  1818. journal->j_flags &= ~JFS_BARRIER;
  1819. if (test_opt(sb, DATA_ERR_ABORT))
  1820. journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR;
  1821. else
  1822. journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR;
  1823. spin_unlock(&journal->j_state_lock);
  1824. }
  1825. static journal_t *ext3_get_journal(struct super_block *sb,
  1826. unsigned int journal_inum)
  1827. {
  1828. struct inode *journal_inode;
  1829. journal_t *journal;
  1830. /* First, test for the existence of a valid inode on disk. Bad
  1831. * things happen if we iget() an unused inode, as the subsequent
  1832. * iput() will try to delete it. */
  1833. journal_inode = ext3_iget(sb, journal_inum);
  1834. if (IS_ERR(journal_inode)) {
  1835. printk(KERN_ERR "EXT3-fs: no journal found.\n");
  1836. return NULL;
  1837. }
  1838. if (!journal_inode->i_nlink) {
  1839. make_bad_inode(journal_inode);
  1840. iput(journal_inode);
  1841. printk(KERN_ERR "EXT3-fs: journal inode is deleted.\n");
  1842. return NULL;
  1843. }
  1844. jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
  1845. journal_inode, journal_inode->i_size);
  1846. if (!S_ISREG(journal_inode->i_mode)) {
  1847. printk(KERN_ERR "EXT3-fs: invalid journal inode.\n");
  1848. iput(journal_inode);
  1849. return NULL;
  1850. }
  1851. journal = journal_init_inode(journal_inode);
  1852. if (!journal) {
  1853. printk(KERN_ERR "EXT3-fs: Could not load journal inode\n");
  1854. iput(journal_inode);
  1855. return NULL;
  1856. }
  1857. journal->j_private = sb;
  1858. ext3_init_journal_params(sb, journal);
  1859. return journal;
  1860. }
  1861. static journal_t *ext3_get_dev_journal(struct super_block *sb,
  1862. dev_t j_dev)
  1863. {
  1864. struct buffer_head * bh;
  1865. journal_t *journal;
  1866. ext3_fsblk_t start;
  1867. ext3_fsblk_t len;
  1868. int hblock, blocksize;
  1869. ext3_fsblk_t sb_block;
  1870. unsigned long offset;
  1871. struct ext3_super_block * es;
  1872. struct block_device *bdev;
  1873. bdev = ext3_blkdev_get(j_dev);
  1874. if (bdev == NULL)
  1875. return NULL;
  1876. if (bd_claim(bdev, sb)) {
  1877. printk(KERN_ERR
  1878. "EXT3: failed to claim external journal device.\n");
  1879. blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
  1880. return NULL;
  1881. }
  1882. blocksize = sb->s_blocksize;
  1883. hblock = bdev_hardsect_size(bdev);
  1884. if (blocksize < hblock) {
  1885. printk(KERN_ERR
  1886. "EXT3-fs: blocksize too small for journal device.\n");
  1887. goto out_bdev;
  1888. }
  1889. sb_block = EXT3_MIN_BLOCK_SIZE / blocksize;
  1890. offset = EXT3_MIN_BLOCK_SIZE % blocksize;
  1891. set_blocksize(bdev, blocksize);
  1892. if (!(bh = __bread(bdev, sb_block, blocksize))) {
  1893. printk(KERN_ERR "EXT3-fs: couldn't read superblock of "
  1894. "external journal\n");
  1895. goto out_bdev;
  1896. }
  1897. es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
  1898. if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
  1899. !(le32_to_cpu(es->s_feature_incompat) &
  1900. EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
  1901. printk(KERN_ERR "EXT3-fs: external journal has "
  1902. "bad superblock\n");
  1903. brelse(bh);
  1904. goto out_bdev;
  1905. }
  1906. if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
  1907. printk(KERN_ERR "EXT3-fs: journal UUID does not match\n");
  1908. brelse(bh);
  1909. goto out_bdev;
  1910. }
  1911. len = le32_to_cpu(es->s_blocks_count);
  1912. start = sb_block + 1;
  1913. brelse(bh); /* we're done with the superblock */
  1914. journal = journal_init_dev(bdev, sb->s_bdev,
  1915. start, len, blocksize);
  1916. if (!journal) {
  1917. printk(KERN_ERR "EXT3-fs: failed to create device journal\n");
  1918. goto out_bdev;
  1919. }
  1920. journal->j_private = sb;
  1921. ll_rw_block(READ, 1, &journal->j_sb_buffer);
  1922. wait_on_buffer(journal->j_sb_buffer);
  1923. if (!buffer_uptodate(journal->j_sb_buffer)) {
  1924. printk(KERN_ERR "EXT3-fs: I/O error on journal device\n");
  1925. goto out_journal;
  1926. }
  1927. if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
  1928. printk(KERN_ERR "EXT3-fs: External journal has more than one "
  1929. "user (unsupported) - %d\n",
  1930. be32_to_cpu(journal->j_superblock->s_nr_users));
  1931. goto out_journal;
  1932. }
  1933. EXT3_SB(sb)->journal_bdev = bdev;
  1934. ext3_init_journal_params(sb, journal);
  1935. return journal;
  1936. out_journal:
  1937. journal_destroy(journal);
  1938. out_bdev:
  1939. ext3_blkdev_put(bdev);
  1940. return NULL;
  1941. }
  1942. static int ext3_load_journal(struct super_block *sb,
  1943. struct ext3_super_block *es,
  1944. unsigned long journal_devnum)
  1945. {
  1946. journal_t *journal;
  1947. unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
  1948. dev_t journal_dev;
  1949. int err = 0;
  1950. int really_read_only;
  1951. if (journal_devnum &&
  1952. journal_devnum != le32_to_cpu(es->s_journal_dev)) {
  1953. printk(KERN_INFO "EXT3-fs: external journal device major/minor "
  1954. "numbers have changed\n");
  1955. journal_dev = new_decode_dev(journal_devnum);
  1956. } else
  1957. journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
  1958. really_read_only = bdev_read_only(sb->s_bdev);
  1959. /*
  1960. * Are we loading a blank journal or performing recovery after a
  1961. * crash? For recovery, we need to check in advance whether we
  1962. * can get read-write access to the device.
  1963. */
  1964. if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) {
  1965. if (sb->s_flags & MS_RDONLY) {
  1966. printk(KERN_INFO "EXT3-fs: INFO: recovery "
  1967. "required on readonly filesystem.\n");
  1968. if (really_read_only) {
  1969. printk(KERN_ERR "EXT3-fs: write access "
  1970. "unavailable, cannot proceed.\n");
  1971. return -EROFS;
  1972. }
  1973. printk (KERN_INFO "EXT3-fs: write access will "
  1974. "be enabled during recovery.\n");
  1975. }
  1976. }
  1977. if (journal_inum && journal_dev) {
  1978. printk(KERN_ERR "EXT3-fs: filesystem has both journal "
  1979. "and inode journals!\n");
  1980. return -EINVAL;
  1981. }
  1982. if (journal_inum) {
  1983. if (!(journal = ext3_get_journal(sb, journal_inum)))
  1984. return -EINVAL;
  1985. } else {
  1986. if (!(journal = ext3_get_dev_journal(sb, journal_dev)))
  1987. return -EINVAL;
  1988. }
  1989. if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
  1990. err = journal_update_format(journal);
  1991. if (err) {
  1992. printk(KERN_ERR "EXT3-fs: error updating journal.\n");
  1993. journal_destroy(journal);
  1994. return err;
  1995. }
  1996. }
  1997. if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER))
  1998. err = journal_wipe(journal, !really_read_only);
  1999. if (!err)
  2000. err = journal_load(journal);
  2001. if (err) {
  2002. printk(KERN_ERR "EXT3-fs: error loading journal.\n");
  2003. journal_destroy(journal);
  2004. return err;
  2005. }
  2006. EXT3_SB(sb)->s_journal = journal;
  2007. ext3_clear_journal_err(sb, es);
  2008. if (journal_devnum &&
  2009. journal_devnum != le32_to_cpu(es->s_journal_dev)) {
  2010. es->s_journal_dev = cpu_to_le32(journal_devnum);
  2011. sb->s_dirt = 1;
  2012. /* Make sure we flush the recovery flag to disk. */
  2013. ext3_commit_super(sb, es, 1);
  2014. }
  2015. return 0;
  2016. }
  2017. static int ext3_create_journal(struct super_block * sb,
  2018. struct ext3_super_block * es,
  2019. unsigned int journal_inum)
  2020. {
  2021. journal_t *journal;
  2022. int err;
  2023. if (sb->s_flags & MS_RDONLY) {
  2024. printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to "
  2025. "create journal.\n");
  2026. return -EROFS;
  2027. }
  2028. journal = ext3_get_journal(sb, journal_inum);
  2029. if (!journal)
  2030. return -EINVAL;
  2031. printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n",
  2032. journal_inum);
  2033. err = journal_create(journal);
  2034. if (err) {
  2035. printk(KERN_ERR "EXT3-fs: error creating journal.\n");
  2036. journal_destroy(journal);
  2037. return -EIO;
  2038. }
  2039. EXT3_SB(sb)->s_journal = journal;
  2040. ext3_update_dynamic_rev(sb);
  2041. EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
  2042. EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL);
  2043. es->s_journal_inum = cpu_to_le32(journal_inum);
  2044. sb->s_dirt = 1;
  2045. /* Make sure we flush the recovery flag to disk. */
  2046. ext3_commit_super(sb, es, 1);
  2047. return 0;
  2048. }
  2049. static void ext3_commit_super (struct super_block * sb,
  2050. struct ext3_super_block * es,
  2051. int sync)
  2052. {
  2053. struct buffer_head *sbh = EXT3_SB(sb)->s_sbh;
  2054. if (!sbh)
  2055. return;
  2056. es->s_wtime = cpu_to_le32(get_seconds());
  2057. es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb));
  2058. es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
  2059. BUFFER_TRACE(sbh, "marking dirty");
  2060. mark_buffer_dirty(sbh);
  2061. if (sync)
  2062. sync_dirty_buffer(sbh);
  2063. }
  2064. /*
  2065. * Have we just finished recovery? If so, and if we are mounting (or
  2066. * remounting) the filesystem readonly, then we will end up with a
  2067. * consistent fs on disk. Record that fact.
  2068. */
  2069. static void ext3_mark_recovery_complete(struct super_block * sb,
  2070. struct ext3_super_block * es)
  2071. {
  2072. journal_t *journal = EXT3_SB(sb)->s_journal;
  2073. journal_lock_updates(journal);
  2074. if (journal_flush(journal) < 0)
  2075. goto out;
  2076. lock_super(sb);
  2077. if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
  2078. sb->s_flags & MS_RDONLY) {
  2079. EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
  2080. sb->s_dirt = 0;
  2081. ext3_commit_super(sb, es, 1);
  2082. }
  2083. unlock_super(sb);
  2084. out:
  2085. journal_unlock_updates(journal);
  2086. }
  2087. /*
  2088. * If we are mounting (or read-write remounting) a filesystem whose journal
  2089. * has recorded an error from a previous lifetime, move that error to the
  2090. * main filesystem now.
  2091. */
  2092. static void ext3_clear_journal_err(struct super_block * sb,
  2093. struct ext3_super_block * es)
  2094. {
  2095. journal_t *journal;
  2096. int j_errno;
  2097. const char *errstr;
  2098. journal = EXT3_SB(sb)->s_journal;
  2099. /*
  2100. * Now check for any error status which may have been recorded in the
  2101. * journal by a prior ext3_error() or ext3_abort()
  2102. */
  2103. j_errno = journal_errno(journal);
  2104. if (j_errno) {
  2105. char nbuf[16];
  2106. errstr = ext3_decode_error(sb, j_errno, nbuf);
  2107. ext3_warning(sb, __func__, "Filesystem error recorded "
  2108. "from previous mount: %s", errstr);
  2109. ext3_warning(sb, __func__, "Marking fs in need of "
  2110. "filesystem check.");
  2111. EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
  2112. es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
  2113. ext3_commit_super (sb, es, 1);
  2114. journal_clear_err(journal);
  2115. }
  2116. }
  2117. /*
  2118. * Force the running and committing transactions to commit,
  2119. * and wait on the commit.
  2120. */
  2121. int ext3_force_commit(struct super_block *sb)
  2122. {
  2123. journal_t *journal;
  2124. int ret;
  2125. if (sb->s_flags & MS_RDONLY)
  2126. return 0;
  2127. journal = EXT3_SB(sb)->s_journal;
  2128. sb->s_dirt = 0;
  2129. ret = ext3_journal_force_commit(journal);
  2130. return ret;
  2131. }
  2132. /*
  2133. * Ext3 always journals updates to the superblock itself, so we don't
  2134. * have to propagate any other updates to the superblock on disk at this
  2135. * point. (We can probably nuke this function altogether, and remove
  2136. * any mention to sb->s_dirt in all of fs/ext3; eventual cleanup...)
  2137. */
  2138. static void ext3_write_super (struct super_block * sb)
  2139. {
  2140. if (mutex_trylock(&sb->s_lock) != 0)
  2141. BUG();
  2142. sb->s_dirt = 0;
  2143. }
  2144. static int ext3_sync_fs(struct super_block *sb, int wait)
  2145. {
  2146. sb->s_dirt = 0;
  2147. if (wait)
  2148. ext3_force_commit(sb);
  2149. else
  2150. journal_start_commit(EXT3_SB(sb)->s_journal, NULL);
  2151. return 0;
  2152. }
  2153. /*
  2154. * LVM calls this function before a (read-only) snapshot is created. This
  2155. * gives us a chance to flush the journal completely and mark the fs clean.
  2156. */
  2157. static void ext3_write_super_lockfs(struct super_block *sb)
  2158. {
  2159. sb->s_dirt = 0;
  2160. if (!(sb->s_flags & MS_RDONLY)) {
  2161. journal_t *journal = EXT3_SB(sb)->s_journal;
  2162. /* Now we set up the journal barrier. */
  2163. journal_lock_updates(journal);
  2164. /*
  2165. * We don't want to clear needs_recovery flag when we failed
  2166. * to flush the journal.
  2167. */
  2168. if (journal_flush(journal) < 0)
  2169. return;
  2170. /* Journal blocked and flushed, clear needs_recovery flag. */
  2171. EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
  2172. ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
  2173. }
  2174. }
  2175. /*
  2176. * Called by LVM after the snapshot is done. We need to reset the RECOVER
  2177. * flag here, even though the filesystem is not technically dirty yet.
  2178. */
  2179. static void ext3_unlockfs(struct super_block *sb)
  2180. {
  2181. if (!(sb->s_flags & MS_RDONLY)) {
  2182. lock_super(sb);
  2183. /* Reser the needs_recovery flag before the fs is unlocked. */
  2184. EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
  2185. ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
  2186. unlock_super(sb);
  2187. journal_unlock_updates(EXT3_SB(sb)->s_journal);
  2188. }
  2189. }
  2190. static int ext3_remount (struct super_block * sb, int * flags, char * data)
  2191. {
  2192. struct ext3_super_block * es;
  2193. struct ext3_sb_info *sbi = EXT3_SB(sb);
  2194. ext3_fsblk_t n_blocks_count = 0;
  2195. unsigned long old_sb_flags;
  2196. struct ext3_mount_options old_opts;
  2197. int err;
  2198. #ifdef CONFIG_QUOTA
  2199. int i;
  2200. #endif
  2201. /* Store the original options */
  2202. old_sb_flags = sb->s_flags;
  2203. old_opts.s_mount_opt = sbi->s_mount_opt;
  2204. old_opts.s_resuid = sbi->s_resuid;
  2205. old_opts.s_resgid = sbi->s_resgid;
  2206. old_opts.s_commit_interval = sbi->s_commit_interval;
  2207. #ifdef CONFIG_QUOTA
  2208. old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
  2209. for (i = 0; i < MAXQUOTAS; i++)
  2210. old_opts.s_qf_names[i] = sbi->s_qf_names[i];
  2211. #endif
  2212. /*
  2213. * Allow the "check" option to be passed as a remount option.
  2214. */
  2215. if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) {
  2216. err = -EINVAL;
  2217. goto restore_opts;
  2218. }
  2219. if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
  2220. ext3_abort(sb, __func__, "Abort forced by user");
  2221. sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
  2222. ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
  2223. es = sbi->s_es;
  2224. ext3_init_journal_params(sb, sbi->s_journal);
  2225. if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
  2226. n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
  2227. if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) {
  2228. err = -EROFS;
  2229. goto restore_opts;
  2230. }
  2231. if (*flags & MS_RDONLY) {
  2232. /*
  2233. * First of all, the unconditional stuff we have to do
  2234. * to disable replay of the journal when we next remount
  2235. */
  2236. sb->s_flags |= MS_RDONLY;
  2237. /*
  2238. * OK, test if we are remounting a valid rw partition
  2239. * readonly, and if so set the rdonly flag and then
  2240. * mark the partition as valid again.
  2241. */
  2242. if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) &&
  2243. (sbi->s_mount_state & EXT3_VALID_FS))
  2244. es->s_state = cpu_to_le16(sbi->s_mount_state);
  2245. /*
  2246. * We have to unlock super so that we can wait for
  2247. * transactions.
  2248. */
  2249. unlock_super(sb);
  2250. ext3_mark_recovery_complete(sb, es);
  2251. lock_super(sb);
  2252. } else {
  2253. __le32 ret;
  2254. if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
  2255. ~EXT3_FEATURE_RO_COMPAT_SUPP))) {
  2256. printk(KERN_WARNING "EXT3-fs: %s: couldn't "
  2257. "remount RDWR because of unsupported "
  2258. "optional features (%x).\n",
  2259. sb->s_id, le32_to_cpu(ret));
  2260. err = -EROFS;
  2261. goto restore_opts;
  2262. }
  2263. /*
  2264. * If we have an unprocessed orphan list hanging
  2265. * around from a previously readonly bdev mount,
  2266. * require a full umount/remount for now.
  2267. */
  2268. if (es->s_last_orphan) {
  2269. printk(KERN_WARNING "EXT3-fs: %s: couldn't "
  2270. "remount RDWR because of unprocessed "
  2271. "orphan inode list. Please "
  2272. "umount/remount instead.\n",
  2273. sb->s_id);
  2274. err = -EINVAL;
  2275. goto restore_opts;
  2276. }
  2277. /*
  2278. * Mounting a RDONLY partition read-write, so reread
  2279. * and store the current valid flag. (It may have
  2280. * been changed by e2fsck since we originally mounted
  2281. * the partition.)
  2282. */
  2283. ext3_clear_journal_err(sb, es);
  2284. sbi->s_mount_state = le16_to_cpu(es->s_state);
  2285. if ((err = ext3_group_extend(sb, es, n_blocks_count)))
  2286. goto restore_opts;
  2287. if (!ext3_setup_super (sb, es, 0))
  2288. sb->s_flags &= ~MS_RDONLY;
  2289. }
  2290. }
  2291. #ifdef CONFIG_QUOTA
  2292. /* Release old quota file names */
  2293. for (i = 0; i < MAXQUOTAS; i++)
  2294. if (old_opts.s_qf_names[i] &&
  2295. old_opts.s_qf_names[i] != sbi->s_qf_names[i])
  2296. kfree(old_opts.s_qf_names[i]);
  2297. #endif
  2298. return 0;
  2299. restore_opts:
  2300. sb->s_flags = old_sb_flags;
  2301. sbi->s_mount_opt = old_opts.s_mount_opt;
  2302. sbi->s_resuid = old_opts.s_resuid;
  2303. sbi->s_resgid = old_opts.s_resgid;
  2304. sbi->s_commit_interval = old_opts.s_commit_interval;
  2305. #ifdef CONFIG_QUOTA
  2306. sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
  2307. for (i = 0; i < MAXQUOTAS; i++) {
  2308. if (sbi->s_qf_names[i] &&
  2309. old_opts.s_qf_names[i] != sbi->s_qf_names[i])
  2310. kfree(sbi->s_qf_names[i]);
  2311. sbi->s_qf_names[i] = old_opts.s_qf_names[i];
  2312. }
  2313. #endif
  2314. return err;
  2315. }
  2316. static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
  2317. {
  2318. struct super_block *sb = dentry->d_sb;
  2319. struct ext3_sb_info *sbi = EXT3_SB(sb);
  2320. struct ext3_super_block *es = sbi->s_es;
  2321. u64 fsid;
  2322. if (test_opt(sb, MINIX_DF)) {
  2323. sbi->s_overhead_last = 0;
  2324. } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
  2325. unsigned long ngroups = sbi->s_groups_count, i;
  2326. ext3_fsblk_t overhead = 0;
  2327. smp_rmb();
  2328. /*
  2329. * Compute the overhead (FS structures). This is constant
  2330. * for a given filesystem unless the number of block groups
  2331. * changes so we cache the previous value until it does.
  2332. */
  2333. /*
  2334. * All of the blocks before first_data_block are
  2335. * overhead
  2336. */
  2337. overhead = le32_to_cpu(es->s_first_data_block);
  2338. /*
  2339. * Add the overhead attributed to the superblock and
  2340. * block group descriptors. If the sparse superblocks
  2341. * feature is turned on, then not all groups have this.
  2342. */
  2343. for (i = 0; i < ngroups; i++) {
  2344. overhead += ext3_bg_has_super(sb, i) +
  2345. ext3_bg_num_gdb(sb, i);
  2346. cond_resched();
  2347. }
  2348. /*
  2349. * Every block group has an inode bitmap, a block
  2350. * bitmap, and an inode table.
  2351. */
  2352. overhead += ngroups * (2 + sbi->s_itb_per_group);
  2353. sbi->s_overhead_last = overhead;
  2354. smp_wmb();
  2355. sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
  2356. }
  2357. buf->f_type = EXT3_SUPER_MAGIC;
  2358. buf->f_bsize = sb->s_blocksize;
  2359. buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last;
  2360. buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
  2361. es->s_free_blocks_count = cpu_to_le32(buf->f_bfree);
  2362. buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
  2363. if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
  2364. buf->f_bavail = 0;
  2365. buf->f_files = le32_to_cpu(es->s_inodes_count);
  2366. buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
  2367. es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
  2368. buf->f_namelen = EXT3_NAME_LEN;
  2369. fsid = le64_to_cpup((void *)es->s_uuid) ^
  2370. le64_to_cpup((void *)es->s_uuid + sizeof(u64));
  2371. buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
  2372. buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
  2373. return 0;
  2374. }
  2375. /* Helper function for writing quotas on sync - we need to start transaction before quota file
  2376. * is locked for write. Otherwise the are possible deadlocks:
  2377. * Process 1 Process 2
  2378. * ext3_create() quota_sync()
  2379. * journal_start() write_dquot()
  2380. * DQUOT_INIT() down(dqio_mutex)
  2381. * down(dqio_mutex) journal_start()
  2382. *
  2383. */
  2384. #ifdef CONFIG_QUOTA
  2385. static inline struct inode *dquot_to_inode(struct dquot *dquot)
  2386. {
  2387. return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
  2388. }
  2389. static int ext3_dquot_initialize(struct inode *inode, int type)
  2390. {
  2391. handle_t *handle;
  2392. int ret, err;
  2393. /* We may create quota structure so we need to reserve enough blocks */
  2394. handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb));
  2395. if (IS_ERR(handle))
  2396. return PTR_ERR(handle);
  2397. ret = dquot_initialize(inode, type);
  2398. err = ext3_journal_stop(handle);
  2399. if (!ret)
  2400. ret = err;
  2401. return ret;
  2402. }
  2403. static int ext3_dquot_drop(struct inode *inode)
  2404. {
  2405. handle_t *handle;
  2406. int ret, err;
  2407. /* We may delete quota structure so we need to reserve enough blocks */
  2408. handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb));
  2409. if (IS_ERR(handle)) {
  2410. /*
  2411. * We call dquot_drop() anyway to at least release references
  2412. * to quota structures so that umount does not hang.
  2413. */
  2414. dquot_drop(inode);
  2415. return PTR_ERR(handle);
  2416. }
  2417. ret = dquot_drop(inode);
  2418. err = ext3_journal_stop(handle);
  2419. if (!ret)
  2420. ret = err;
  2421. return ret;
  2422. }
  2423. static int ext3_write_dquot(struct dquot *dquot)
  2424. {
  2425. int ret, err;
  2426. handle_t *handle;
  2427. struct inode *inode;
  2428. inode = dquot_to_inode(dquot);
  2429. handle = ext3_journal_start(inode,
  2430. EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
  2431. if (IS_ERR(handle))
  2432. return PTR_ERR(handle);
  2433. ret = dquot_commit(dquot);
  2434. err = ext3_journal_stop(handle);
  2435. if (!ret)
  2436. ret = err;
  2437. return ret;
  2438. }
  2439. static int ext3_acquire_dquot(struct dquot *dquot)
  2440. {
  2441. int ret, err;
  2442. handle_t *handle;
  2443. handle = ext3_journal_start(dquot_to_inode(dquot),
  2444. EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
  2445. if (IS_ERR(handle))
  2446. return PTR_ERR(handle);
  2447. ret = dquot_acquire(dquot);
  2448. err = ext3_journal_stop(handle);
  2449. if (!ret)
  2450. ret = err;
  2451. return ret;
  2452. }
  2453. static int ext3_release_dquot(struct dquot *dquot)
  2454. {
  2455. int ret, err;
  2456. handle_t *handle;
  2457. handle = ext3_journal_start(dquot_to_inode(dquot),
  2458. EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
  2459. if (IS_ERR(handle)) {
  2460. /* Release dquot anyway to avoid endless cycle in dqput() */
  2461. dquot_release(dquot);
  2462. return PTR_ERR(handle);
  2463. }
  2464. ret = dquot_release(dquot);
  2465. err = ext3_journal_stop(handle);
  2466. if (!ret)
  2467. ret = err;
  2468. return ret;
  2469. }
  2470. static int ext3_mark_dquot_dirty(struct dquot *dquot)
  2471. {
  2472. /* Are we journaling quotas? */
  2473. if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
  2474. EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
  2475. dquot_mark_dquot_dirty(dquot);
  2476. return ext3_write_dquot(dquot);
  2477. } else {
  2478. return dquot_mark_dquot_dirty(dquot);
  2479. }
  2480. }
  2481. static int ext3_write_info(struct super_block *sb, int type)
  2482. {
  2483. int ret, err;
  2484. handle_t *handle;
  2485. /* Data block + inode block */
  2486. handle = ext3_journal_start(sb->s_root->d_inode, 2);
  2487. if (IS_ERR(handle))
  2488. return PTR_ERR(handle);
  2489. ret = dquot_commit_info(sb, type);
  2490. err = ext3_journal_stop(handle);
  2491. if (!ret)
  2492. ret = err;
  2493. return ret;
  2494. }
  2495. /*
  2496. * Turn on quotas during mount time - we need to find
  2497. * the quota file and such...
  2498. */
  2499. static int ext3_quota_on_mount(struct super_block *sb, int type)
  2500. {
  2501. return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
  2502. EXT3_SB(sb)->s_jquota_fmt, type);
  2503. }
  2504. /*
  2505. * Standard function to be called on quota_on
  2506. */
  2507. static int ext3_quota_on(struct super_block *sb, int type, int format_id,
  2508. char *name, int remount)
  2509. {
  2510. int err;
  2511. struct path path;
  2512. if (!test_opt(sb, QUOTA))
  2513. return -EINVAL;
  2514. /* When remounting, no checks are needed and in fact, name is NULL */
  2515. if (remount)
  2516. return vfs_quota_on(sb, type, format_id, name, remount);
  2517. err = kern_path(name, LOOKUP_FOLLOW, &path);
  2518. if (err)
  2519. return err;
  2520. /* Quotafile not on the same filesystem? */
  2521. if (path.mnt->mnt_sb != sb) {
  2522. path_put(&path);
  2523. return -EXDEV;
  2524. }
  2525. /* Journaling quota? */
  2526. if (EXT3_SB(sb)->s_qf_names[type]) {
  2527. /* Quotafile not of fs root? */
  2528. if (path.dentry->d_parent != sb->s_root)
  2529. printk(KERN_WARNING
  2530. "EXT3-fs: Quota file not on filesystem root. "
  2531. "Journaled quota will not work.\n");
  2532. }
  2533. /*
  2534. * When we journal data on quota file, we have to flush journal to see
  2535. * all updates to the file when we bypass pagecache...
  2536. */
  2537. if (ext3_should_journal_data(path.dentry->d_inode)) {
  2538. /*
  2539. * We don't need to lock updates but journal_flush() could
  2540. * otherwise be livelocked...
  2541. */
  2542. journal_lock_updates(EXT3_SB(sb)->s_journal);
  2543. err = journal_flush(EXT3_SB(sb)->s_journal);
  2544. journal_unlock_updates(EXT3_SB(sb)->s_journal);
  2545. if (err) {
  2546. path_put(&path);
  2547. return err;
  2548. }
  2549. }
  2550. err = vfs_quota_on_path(sb, type, format_id, &path);
  2551. path_put(&path);
  2552. return err;
  2553. }
  2554. /* Read data from quotafile - avoid pagecache and such because we cannot afford
  2555. * acquiring the locks... As quota files are never truncated and quota code
  2556. * itself serializes the operations (and noone else should touch the files)
  2557. * we don't have to be afraid of races */
  2558. static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
  2559. size_t len, loff_t off)
  2560. {
  2561. struct inode *inode = sb_dqopt(sb)->files[type];
  2562. sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
  2563. int err = 0;
  2564. int offset = off & (sb->s_blocksize - 1);
  2565. int tocopy;
  2566. size_t toread;
  2567. struct buffer_head *bh;
  2568. loff_t i_size = i_size_read(inode);
  2569. if (off > i_size)
  2570. return 0;
  2571. if (off+len > i_size)
  2572. len = i_size-off;
  2573. toread = len;
  2574. while (toread > 0) {
  2575. tocopy = sb->s_blocksize - offset < toread ?
  2576. sb->s_blocksize - offset : toread;
  2577. bh = ext3_bread(NULL, inode, blk, 0, &err);
  2578. if (err)
  2579. return err;
  2580. if (!bh) /* A hole? */
  2581. memset(data, 0, tocopy);
  2582. else
  2583. memcpy(data, bh->b_data+offset, tocopy);
  2584. brelse(bh);
  2585. offset = 0;
  2586. toread -= tocopy;
  2587. data += tocopy;
  2588. blk++;
  2589. }
  2590. return len;
  2591. }
  2592. /* Write to quotafile (we know the transaction is already started and has
  2593. * enough credits) */
  2594. static ssize_t ext3_quota_write(struct super_block *sb, int type,
  2595. const char *data, size_t len, loff_t off)
  2596. {
  2597. struct inode *inode = sb_dqopt(sb)->files[type];
  2598. sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
  2599. int err = 0;
  2600. int offset = off & (sb->s_blocksize - 1);
  2601. int tocopy;
  2602. int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL;
  2603. size_t towrite = len;
  2604. struct buffer_head *bh;
  2605. handle_t *handle = journal_current_handle();
  2606. if (!handle) {
  2607. printk(KERN_WARNING "EXT3-fs: Quota write (off=%Lu, len=%Lu)"
  2608. " cancelled because transaction is not started.\n",
  2609. (unsigned long long)off, (unsigned long long)len);
  2610. return -EIO;
  2611. }
  2612. mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
  2613. while (towrite > 0) {
  2614. tocopy = sb->s_blocksize - offset < towrite ?
  2615. sb->s_blocksize - offset : towrite;
  2616. bh = ext3_bread(handle, inode, blk, 1, &err);
  2617. if (!bh)
  2618. goto out;
  2619. if (journal_quota) {
  2620. err = ext3_journal_get_write_access(handle, bh);
  2621. if (err) {
  2622. brelse(bh);
  2623. goto out;
  2624. }
  2625. }
  2626. lock_buffer(bh);
  2627. memcpy(bh->b_data+offset, data, tocopy);
  2628. flush_dcache_page(bh->b_page);
  2629. unlock_buffer(bh);
  2630. if (journal_quota)
  2631. err = ext3_journal_dirty_metadata(handle, bh);
  2632. else {
  2633. /* Always do at least ordered writes for quotas */
  2634. err = ext3_journal_dirty_data(handle, bh);
  2635. mark_buffer_dirty(bh);
  2636. }
  2637. brelse(bh);
  2638. if (err)
  2639. goto out;
  2640. offset = 0;
  2641. towrite -= tocopy;
  2642. data += tocopy;
  2643. blk++;
  2644. }
  2645. out:
  2646. if (len == towrite) {
  2647. mutex_unlock(&inode->i_mutex);
  2648. return err;
  2649. }
  2650. if (inode->i_size < off+len-towrite) {
  2651. i_size_write(inode, off+len-towrite);
  2652. EXT3_I(inode)->i_disksize = inode->i_size;
  2653. }
  2654. inode->i_version++;
  2655. inode->i_mtime = inode->i_ctime = CURRENT_TIME;
  2656. ext3_mark_inode_dirty(handle, inode);
  2657. mutex_unlock(&inode->i_mutex);
  2658. return len - towrite;
  2659. }
  2660. #endif
  2661. static int ext3_get_sb(struct file_system_type *fs_type,
  2662. int flags, const char *dev_name, void *data, struct vfsmount *mnt)
  2663. {
  2664. return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt);
  2665. }
  2666. static struct file_system_type ext3_fs_type = {
  2667. .owner = THIS_MODULE,
  2668. .name = "ext3",
  2669. .get_sb = ext3_get_sb,
  2670. .kill_sb = kill_block_super,
  2671. .fs_flags = FS_REQUIRES_DEV,
  2672. };
  2673. static int __init init_ext3_fs(void)
  2674. {
  2675. int err = init_ext3_xattr();
  2676. if (err)
  2677. return err;
  2678. err = init_inodecache();
  2679. if (err)
  2680. goto out1;
  2681. err = register_filesystem(&ext3_fs_type);
  2682. if (err)
  2683. goto out;
  2684. return 0;
  2685. out:
  2686. destroy_inodecache();
  2687. out1:
  2688. exit_ext3_xattr();
  2689. return err;
  2690. }
  2691. static void __exit exit_ext3_fs(void)
  2692. {
  2693. unregister_filesystem(&ext3_fs_type);
  2694. destroy_inodecache();
  2695. exit_ext3_xattr();
  2696. }
  2697. MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
  2698. MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
  2699. MODULE_LICENSE("GPL");
  2700. module_init(init_ext3_fs)
  2701. module_exit(exit_ext3_fs)