aops.c 71 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423
  1. /**
  2. * aops.c - NTFS kernel address space operations and page cache handling.
  3. * Part of the Linux-NTFS project.
  4. *
  5. * Copyright (c) 2001-2005 Anton Altaparmakov
  6. * Copyright (c) 2002 Richard Russon
  7. *
  8. * This program/include file is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU General Public License as published
  10. * by the Free Software Foundation; either version 2 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * This program/include file is distributed in the hope that it will be
  14. * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
  15. * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with this program (in the main directory of the Linux-NTFS
  20. * distribution in the file COPYING); if not, write to the Free Software
  21. * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  22. */
  23. #include <linux/errno.h>
  24. #include <linux/mm.h>
  25. #include <linux/pagemap.h>
  26. #include <linux/swap.h>
  27. #include <linux/buffer_head.h>
  28. #include <linux/writeback.h>
  29. #include "aops.h"
  30. #include "attrib.h"
  31. #include "debug.h"
  32. #include "inode.h"
  33. #include "mft.h"
  34. #include "runlist.h"
  35. #include "types.h"
  36. #include "ntfs.h"
  37. /**
  38. * ntfs_end_buffer_async_read - async io completion for reading attributes
  39. * @bh: buffer head on which io is completed
  40. * @uptodate: whether @bh is now uptodate or not
  41. *
  42. * Asynchronous I/O completion handler for reading pages belonging to the
  43. * attribute address space of an inode. The inodes can either be files or
  44. * directories or they can be fake inodes describing some attribute.
  45. *
  46. * If NInoMstProtected(), perform the post read mst fixups when all IO on the
  47. * page has been completed and mark the page uptodate or set the error bit on
  48. * the page. To determine the size of the records that need fixing up, we
  49. * cheat a little bit by setting the index_block_size in ntfs_inode to the ntfs
  50. * record size, and index_block_size_bits, to the log(base 2) of the ntfs
  51. * record size.
  52. */
  53. static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
  54. {
  55. static DEFINE_SPINLOCK(page_uptodate_lock);
  56. unsigned long flags;
  57. struct buffer_head *tmp;
  58. struct page *page;
  59. ntfs_inode *ni;
  60. int page_uptodate = 1;
  61. page = bh->b_page;
  62. ni = NTFS_I(page->mapping->host);
  63. if (likely(uptodate)) {
  64. s64 file_ofs, initialized_size;
  65. set_buffer_uptodate(bh);
  66. file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
  67. bh_offset(bh);
  68. read_lock_irqsave(&ni->size_lock, flags);
  69. initialized_size = ni->initialized_size;
  70. read_unlock_irqrestore(&ni->size_lock, flags);
  71. /* Check for the current buffer head overflowing. */
  72. if (file_ofs + bh->b_size > initialized_size) {
  73. char *addr;
  74. int ofs = 0;
  75. if (file_ofs < initialized_size)
  76. ofs = initialized_size - file_ofs;
  77. addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
  78. memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs);
  79. flush_dcache_page(page);
  80. kunmap_atomic(addr, KM_BIO_SRC_IRQ);
  81. }
  82. } else {
  83. clear_buffer_uptodate(bh);
  84. ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.",
  85. (unsigned long long)bh->b_blocknr);
  86. SetPageError(page);
  87. }
  88. spin_lock_irqsave(&page_uptodate_lock, flags);
  89. clear_buffer_async_read(bh);
  90. unlock_buffer(bh);
  91. tmp = bh;
  92. do {
  93. if (!buffer_uptodate(tmp))
  94. page_uptodate = 0;
  95. if (buffer_async_read(tmp)) {
  96. if (likely(buffer_locked(tmp)))
  97. goto still_busy;
  98. /* Async buffers must be locked. */
  99. BUG();
  100. }
  101. tmp = tmp->b_this_page;
  102. } while (tmp != bh);
  103. spin_unlock_irqrestore(&page_uptodate_lock, flags);
  104. /*
  105. * If none of the buffers had errors then we can set the page uptodate,
  106. * but we first have to perform the post read mst fixups, if the
  107. * attribute is mst protected, i.e. if NInoMstProteced(ni) is true.
  108. * Note we ignore fixup errors as those are detected when
  109. * map_mft_record() is called which gives us per record granularity
  110. * rather than per page granularity.
  111. */
  112. if (!NInoMstProtected(ni)) {
  113. if (likely(page_uptodate && !PageError(page)))
  114. SetPageUptodate(page);
  115. } else {
  116. char *addr;
  117. unsigned int i, recs;
  118. u32 rec_size;
  119. rec_size = ni->itype.index.block_size;
  120. recs = PAGE_CACHE_SIZE / rec_size;
  121. /* Should have been verified before we got here... */
  122. BUG_ON(!recs);
  123. addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
  124. for (i = 0; i < recs; i++)
  125. post_read_mst_fixup((NTFS_RECORD*)(addr +
  126. i * rec_size), rec_size);
  127. flush_dcache_page(page);
  128. kunmap_atomic(addr, KM_BIO_SRC_IRQ);
  129. if (likely(page_uptodate && !PageError(page)))
  130. SetPageUptodate(page);
  131. }
  132. unlock_page(page);
  133. return;
  134. still_busy:
  135. spin_unlock_irqrestore(&page_uptodate_lock, flags);
  136. return;
  137. }
  138. /**
  139. * ntfs_read_block - fill a @page of an address space with data
  140. * @page: page cache page to fill with data
  141. *
  142. * Fill the page @page of the address space belonging to the @page->host inode.
  143. * We read each buffer asynchronously and when all buffers are read in, our io
  144. * completion handler ntfs_end_buffer_read_async(), if required, automatically
  145. * applies the mst fixups to the page before finally marking it uptodate and
  146. * unlocking it.
  147. *
  148. * We only enforce allocated_size limit because i_size is checked for in
  149. * generic_file_read().
  150. *
  151. * Return 0 on success and -errno on error.
  152. *
  153. * Contains an adapted version of fs/buffer.c::block_read_full_page().
  154. */
  155. static int ntfs_read_block(struct page *page)
  156. {
  157. VCN vcn;
  158. LCN lcn;
  159. ntfs_inode *ni;
  160. ntfs_volume *vol;
  161. runlist_element *rl;
  162. struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
  163. sector_t iblock, lblock, zblock;
  164. unsigned long flags;
  165. unsigned int blocksize, vcn_ofs;
  166. int i, nr;
  167. unsigned char blocksize_bits;
  168. ni = NTFS_I(page->mapping->host);
  169. vol = ni->vol;
  170. /* $MFT/$DATA must have its complete runlist in memory at all times. */
  171. BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni));
  172. blocksize_bits = VFS_I(ni)->i_blkbits;
  173. blocksize = 1 << blocksize_bits;
  174. if (!page_has_buffers(page)) {
  175. create_empty_buffers(page, blocksize, 0);
  176. if (unlikely(!page_has_buffers(page))) {
  177. unlock_page(page);
  178. return -ENOMEM;
  179. }
  180. }
  181. bh = head = page_buffers(page);
  182. BUG_ON(!bh);
  183. iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
  184. read_lock_irqsave(&ni->size_lock, flags);
  185. lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
  186. zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits;
  187. read_unlock_irqrestore(&ni->size_lock, flags);
  188. /* Loop through all the buffers in the page. */
  189. rl = NULL;
  190. nr = i = 0;
  191. do {
  192. u8 *kaddr;
  193. int err;
  194. if (unlikely(buffer_uptodate(bh)))
  195. continue;
  196. if (unlikely(buffer_mapped(bh))) {
  197. arr[nr++] = bh;
  198. continue;
  199. }
  200. err = 0;
  201. bh->b_bdev = vol->sb->s_bdev;
  202. /* Is the block within the allowed limits? */
  203. if (iblock < lblock) {
  204. BOOL is_retry = FALSE;
  205. /* Convert iblock into corresponding vcn and offset. */
  206. vcn = (VCN)iblock << blocksize_bits >>
  207. vol->cluster_size_bits;
  208. vcn_ofs = ((VCN)iblock << blocksize_bits) &
  209. vol->cluster_size_mask;
  210. if (!rl) {
  211. lock_retry_remap:
  212. down_read(&ni->runlist.lock);
  213. rl = ni->runlist.rl;
  214. }
  215. if (likely(rl != NULL)) {
  216. /* Seek to element containing target vcn. */
  217. while (rl->length && rl[1].vcn <= vcn)
  218. rl++;
  219. lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
  220. } else
  221. lcn = LCN_RL_NOT_MAPPED;
  222. /* Successful remap. */
  223. if (lcn >= 0) {
  224. /* Setup buffer head to correct block. */
  225. bh->b_blocknr = ((lcn << vol->cluster_size_bits)
  226. + vcn_ofs) >> blocksize_bits;
  227. set_buffer_mapped(bh);
  228. /* Only read initialized data blocks. */
  229. if (iblock < zblock) {
  230. arr[nr++] = bh;
  231. continue;
  232. }
  233. /* Fully non-initialized data block, zero it. */
  234. goto handle_zblock;
  235. }
  236. /* It is a hole, need to zero it. */
  237. if (lcn == LCN_HOLE)
  238. goto handle_hole;
  239. /* If first try and runlist unmapped, map and retry. */
  240. if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
  241. is_retry = TRUE;
  242. /*
  243. * Attempt to map runlist, dropping lock for
  244. * the duration.
  245. */
  246. up_read(&ni->runlist.lock);
  247. err = ntfs_map_runlist(ni, vcn);
  248. if (likely(!err))
  249. goto lock_retry_remap;
  250. rl = NULL;
  251. } else if (!rl)
  252. up_read(&ni->runlist.lock);
  253. /*
  254. * If buffer is outside the runlist, treat it as a
  255. * hole. This can happen due to concurrent truncate
  256. * for example.
  257. */
  258. if (err == -ENOENT || lcn == LCN_ENOENT) {
  259. err = 0;
  260. goto handle_hole;
  261. }
  262. /* Hard error, zero out region. */
  263. if (!err)
  264. err = -EIO;
  265. bh->b_blocknr = -1;
  266. SetPageError(page);
  267. ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
  268. "attribute type 0x%x, vcn 0x%llx, "
  269. "offset 0x%x because its location on "
  270. "disk could not be determined%s "
  271. "(error code %i).", ni->mft_no,
  272. ni->type, (unsigned long long)vcn,
  273. vcn_ofs, is_retry ? " even after "
  274. "retrying" : "", err);
  275. }
  276. /*
  277. * Either iblock was outside lblock limits or
  278. * ntfs_rl_vcn_to_lcn() returned error. Just zero that portion
  279. * of the page and set the buffer uptodate.
  280. */
  281. handle_hole:
  282. bh->b_blocknr = -1UL;
  283. clear_buffer_mapped(bh);
  284. handle_zblock:
  285. kaddr = kmap_atomic(page, KM_USER0);
  286. memset(kaddr + i * blocksize, 0, blocksize);
  287. kunmap_atomic(kaddr, KM_USER0);
  288. flush_dcache_page(page);
  289. if (likely(!err))
  290. set_buffer_uptodate(bh);
  291. } while (i++, iblock++, (bh = bh->b_this_page) != head);
  292. /* Release the lock if we took it. */
  293. if (rl)
  294. up_read(&ni->runlist.lock);
  295. /* Check we have at least one buffer ready for i/o. */
  296. if (nr) {
  297. struct buffer_head *tbh;
  298. /* Lock the buffers. */
  299. for (i = 0; i < nr; i++) {
  300. tbh = arr[i];
  301. lock_buffer(tbh);
  302. tbh->b_end_io = ntfs_end_buffer_async_read;
  303. set_buffer_async_read(tbh);
  304. }
  305. /* Finally, start i/o on the buffers. */
  306. for (i = 0; i < nr; i++) {
  307. tbh = arr[i];
  308. if (likely(!buffer_uptodate(tbh)))
  309. submit_bh(READ, tbh);
  310. else
  311. ntfs_end_buffer_async_read(tbh, 1);
  312. }
  313. return 0;
  314. }
  315. /* No i/o was scheduled on any of the buffers. */
  316. if (likely(!PageError(page)))
  317. SetPageUptodate(page);
  318. else /* Signal synchronous i/o error. */
  319. nr = -EIO;
  320. unlock_page(page);
  321. return nr;
  322. }
  323. /**
  324. * ntfs_readpage - fill a @page of a @file with data from the device
  325. * @file: open file to which the page @page belongs or NULL
  326. * @page: page cache page to fill with data
  327. *
  328. * For non-resident attributes, ntfs_readpage() fills the @page of the open
  329. * file @file by calling the ntfs version of the generic block_read_full_page()
  330. * function, ntfs_read_block(), which in turn creates and reads in the buffers
  331. * associated with the page asynchronously.
  332. *
  333. * For resident attributes, OTOH, ntfs_readpage() fills @page by copying the
  334. * data from the mft record (which at this stage is most likely in memory) and
  335. * fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as
  336. * even if the mft record is not cached at this point in time, we need to wait
  337. * for it to be read in before we can do the copy.
  338. *
  339. * Return 0 on success and -errno on error.
  340. */
  341. static int ntfs_readpage(struct file *file, struct page *page)
  342. {
  343. ntfs_inode *ni, *base_ni;
  344. u8 *kaddr;
  345. ntfs_attr_search_ctx *ctx;
  346. MFT_RECORD *mrec;
  347. unsigned long flags;
  348. u32 attr_len;
  349. int err = 0;
  350. retry_readpage:
  351. BUG_ON(!PageLocked(page));
  352. /*
  353. * This can potentially happen because we clear PageUptodate() during
  354. * ntfs_writepage() of MstProtected() attributes.
  355. */
  356. if (PageUptodate(page)) {
  357. unlock_page(page);
  358. return 0;
  359. }
  360. ni = NTFS_I(page->mapping->host);
  361. /*
  362. * Only $DATA attributes can be encrypted and only unnamed $DATA
  363. * attributes can be compressed. Index root can have the flags set but
  364. * this means to create compressed/encrypted files, not that the
  365. * attribute is compressed/encrypted.
  366. */
  367. if (ni->type != AT_INDEX_ROOT) {
  368. /* If attribute is encrypted, deny access, just like NT4. */
  369. if (NInoEncrypted(ni)) {
  370. BUG_ON(ni->type != AT_DATA);
  371. err = -EACCES;
  372. goto err_out;
  373. }
  374. /* Compressed data streams are handled in compress.c. */
  375. if (NInoNonResident(ni) && NInoCompressed(ni)) {
  376. BUG_ON(ni->type != AT_DATA);
  377. BUG_ON(ni->name_len);
  378. return ntfs_read_compressed_block(page);
  379. }
  380. }
  381. /* NInoNonResident() == NInoIndexAllocPresent() */
  382. if (NInoNonResident(ni)) {
  383. /* Normal, non-resident data stream. */
  384. return ntfs_read_block(page);
  385. }
  386. /*
  387. * Attribute is resident, implying it is not compressed or encrypted.
  388. * This also means the attribute is smaller than an mft record and
  389. * hence smaller than a page, so can simply zero out any pages with
  390. * index above 0. Note the attribute can actually be marked compressed
  391. * but if it is resident the actual data is not compressed so we are
  392. * ok to ignore the compressed flag here.
  393. */
  394. if (unlikely(page->index > 0)) {
  395. kaddr = kmap_atomic(page, KM_USER0);
  396. memset(kaddr, 0, PAGE_CACHE_SIZE);
  397. flush_dcache_page(page);
  398. kunmap_atomic(kaddr, KM_USER0);
  399. goto done;
  400. }
  401. if (!NInoAttr(ni))
  402. base_ni = ni;
  403. else
  404. base_ni = ni->ext.base_ntfs_ino;
  405. /* Map, pin, and lock the mft record. */
  406. mrec = map_mft_record(base_ni);
  407. if (IS_ERR(mrec)) {
  408. err = PTR_ERR(mrec);
  409. goto err_out;
  410. }
  411. /*
  412. * If a parallel write made the attribute non-resident, drop the mft
  413. * record and retry the readpage.
  414. */
  415. if (unlikely(NInoNonResident(ni))) {
  416. unmap_mft_record(base_ni);
  417. goto retry_readpage;
  418. }
  419. ctx = ntfs_attr_get_search_ctx(base_ni, mrec);
  420. if (unlikely(!ctx)) {
  421. err = -ENOMEM;
  422. goto unm_err_out;
  423. }
  424. err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
  425. CASE_SENSITIVE, 0, NULL, 0, ctx);
  426. if (unlikely(err))
  427. goto put_unm_err_out;
  428. attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
  429. read_lock_irqsave(&ni->size_lock, flags);
  430. if (unlikely(attr_len > ni->initialized_size))
  431. attr_len = ni->initialized_size;
  432. read_unlock_irqrestore(&ni->size_lock, flags);
  433. kaddr = kmap_atomic(page, KM_USER0);
  434. /* Copy the data to the page. */
  435. memcpy(kaddr, (u8*)ctx->attr +
  436. le16_to_cpu(ctx->attr->data.resident.value_offset),
  437. attr_len);
  438. /* Zero the remainder of the page. */
  439. memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
  440. flush_dcache_page(page);
  441. kunmap_atomic(kaddr, KM_USER0);
  442. put_unm_err_out:
  443. ntfs_attr_put_search_ctx(ctx);
  444. unm_err_out:
  445. unmap_mft_record(base_ni);
  446. done:
  447. SetPageUptodate(page);
  448. err_out:
  449. unlock_page(page);
  450. return err;
  451. }
  452. #ifdef NTFS_RW
  453. /**
  454. * ntfs_write_block - write a @page to the backing store
  455. * @page: page cache page to write out
  456. * @wbc: writeback control structure
  457. *
  458. * This function is for writing pages belonging to non-resident, non-mst
  459. * protected attributes to their backing store.
  460. *
  461. * For a page with buffers, map and write the dirty buffers asynchronously
  462. * under page writeback. For a page without buffers, create buffers for the
  463. * page, then proceed as above.
  464. *
  465. * If a page doesn't have buffers the page dirty state is definitive. If a page
  466. * does have buffers, the page dirty state is just a hint, and the buffer dirty
  467. * state is definitive. (A hint which has rules: dirty buffers against a clean
  468. * page is illegal. Other combinations are legal and need to be handled. In
  469. * particular a dirty page containing clean buffers for example.)
  470. *
  471. * Return 0 on success and -errno on error.
  472. *
  473. * Based on ntfs_read_block() and __block_write_full_page().
  474. */
  475. static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
  476. {
  477. VCN vcn;
  478. LCN lcn;
  479. s64 initialized_size;
  480. loff_t i_size;
  481. sector_t block, dblock, iblock;
  482. struct inode *vi;
  483. ntfs_inode *ni;
  484. ntfs_volume *vol;
  485. runlist_element *rl;
  486. struct buffer_head *bh, *head;
  487. unsigned long flags;
  488. unsigned int blocksize, vcn_ofs;
  489. int err;
  490. BOOL need_end_writeback;
  491. unsigned char blocksize_bits;
  492. vi = page->mapping->host;
  493. ni = NTFS_I(vi);
  494. vol = ni->vol;
  495. ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
  496. "0x%lx.", ni->mft_no, ni->type, page->index);
  497. BUG_ON(!NInoNonResident(ni));
  498. BUG_ON(NInoMstProtected(ni));
  499. blocksize_bits = vi->i_blkbits;
  500. blocksize = 1 << blocksize_bits;
  501. if (!page_has_buffers(page)) {
  502. BUG_ON(!PageUptodate(page));
  503. create_empty_buffers(page, blocksize,
  504. (1 << BH_Uptodate) | (1 << BH_Dirty));
  505. if (unlikely(!page_has_buffers(page))) {
  506. ntfs_warning(vol->sb, "Error allocating page "
  507. "buffers. Redirtying page so we try "
  508. "again later.");
  509. /*
  510. * Put the page back on mapping->dirty_pages, but leave
  511. * its buffers' dirty state as-is.
  512. */
  513. redirty_page_for_writepage(wbc, page);
  514. unlock_page(page);
  515. return 0;
  516. }
  517. }
  518. bh = head = page_buffers(page);
  519. BUG_ON(!bh);
  520. /* NOTE: Different naming scheme to ntfs_read_block()! */
  521. /* The first block in the page. */
  522. block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
  523. read_lock_irqsave(&ni->size_lock, flags);
  524. i_size = i_size_read(vi);
  525. initialized_size = ni->initialized_size;
  526. read_unlock_irqrestore(&ni->size_lock, flags);
  527. /* The first out of bounds block for the data size. */
  528. dblock = (i_size + blocksize - 1) >> blocksize_bits;
  529. /* The last (fully or partially) initialized block. */
  530. iblock = initialized_size >> blocksize_bits;
  531. /*
  532. * Be very careful. We have no exclusion from __set_page_dirty_buffers
  533. * here, and the (potentially unmapped) buffers may become dirty at
  534. * any time. If a buffer becomes dirty here after we've inspected it
  535. * then we just miss that fact, and the page stays dirty.
  536. *
  537. * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
  538. * handle that here by just cleaning them.
  539. */
  540. /*
  541. * Loop through all the buffers in the page, mapping all the dirty
  542. * buffers to disk addresses and handling any aliases from the
  543. * underlying block device's mapping.
  544. */
  545. rl = NULL;
  546. err = 0;
  547. do {
  548. BOOL is_retry = FALSE;
  549. if (unlikely(block >= dblock)) {
  550. /*
  551. * Mapped buffers outside i_size will occur, because
  552. * this page can be outside i_size when there is a
  553. * truncate in progress. The contents of such buffers
  554. * were zeroed by ntfs_writepage().
  555. *
  556. * FIXME: What about the small race window where
  557. * ntfs_writepage() has not done any clearing because
  558. * the page was within i_size but before we get here,
  559. * vmtruncate() modifies i_size?
  560. */
  561. clear_buffer_dirty(bh);
  562. set_buffer_uptodate(bh);
  563. continue;
  564. }
  565. /* Clean buffers are not written out, so no need to map them. */
  566. if (!buffer_dirty(bh))
  567. continue;
  568. /* Make sure we have enough initialized size. */
  569. if (unlikely((block >= iblock) &&
  570. (initialized_size < i_size))) {
  571. /*
  572. * If this page is fully outside initialized size, zero
  573. * out all pages between the current initialized size
  574. * and the current page. Just use ntfs_readpage() to do
  575. * the zeroing transparently.
  576. */
  577. if (block > iblock) {
  578. // TODO:
  579. // For each page do:
  580. // - read_cache_page()
  581. // Again for each page do:
  582. // - wait_on_page_locked()
  583. // - Check (PageUptodate(page) &&
  584. // !PageError(page))
  585. // Update initialized size in the attribute and
  586. // in the inode.
  587. // Again, for each page do:
  588. // __set_page_dirty_buffers();
  589. // page_cache_release()
  590. // We don't need to wait on the writes.
  591. // Update iblock.
  592. }
  593. /*
  594. * The current page straddles initialized size. Zero
  595. * all non-uptodate buffers and set them uptodate (and
  596. * dirty?). Note, there aren't any non-uptodate buffers
  597. * if the page is uptodate.
  598. * FIXME: For an uptodate page, the buffers may need to
  599. * be written out because they were not initialized on
  600. * disk before.
  601. */
  602. if (!PageUptodate(page)) {
  603. // TODO:
  604. // Zero any non-uptodate buffers up to i_size.
  605. // Set them uptodate and dirty.
  606. }
  607. // TODO:
  608. // Update initialized size in the attribute and in the
  609. // inode (up to i_size).
  610. // Update iblock.
  611. // FIXME: This is inefficient. Try to batch the two
  612. // size changes to happen in one go.
  613. ntfs_error(vol->sb, "Writing beyond initialized size "
  614. "is not supported yet. Sorry.");
  615. err = -EOPNOTSUPP;
  616. break;
  617. // Do NOT set_buffer_new() BUT DO clear buffer range
  618. // outside write request range.
  619. // set_buffer_uptodate() on complete buffers as well as
  620. // set_buffer_dirty().
  621. }
  622. /* No need to map buffers that are already mapped. */
  623. if (buffer_mapped(bh))
  624. continue;
  625. /* Unmapped, dirty buffer. Need to map it. */
  626. bh->b_bdev = vol->sb->s_bdev;
  627. /* Convert block into corresponding vcn and offset. */
  628. vcn = (VCN)block << blocksize_bits;
  629. vcn_ofs = vcn & vol->cluster_size_mask;
  630. vcn >>= vol->cluster_size_bits;
  631. if (!rl) {
  632. lock_retry_remap:
  633. down_read(&ni->runlist.lock);
  634. rl = ni->runlist.rl;
  635. }
  636. if (likely(rl != NULL)) {
  637. /* Seek to element containing target vcn. */
  638. while (rl->length && rl[1].vcn <= vcn)
  639. rl++;
  640. lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
  641. } else
  642. lcn = LCN_RL_NOT_MAPPED;
  643. /* Successful remap. */
  644. if (lcn >= 0) {
  645. /* Setup buffer head to point to correct block. */
  646. bh->b_blocknr = ((lcn << vol->cluster_size_bits) +
  647. vcn_ofs) >> blocksize_bits;
  648. set_buffer_mapped(bh);
  649. continue;
  650. }
  651. /* It is a hole, need to instantiate it. */
  652. if (lcn == LCN_HOLE) {
  653. u8 *kaddr;
  654. unsigned long *bpos, *bend;
  655. /* Check if the buffer is zero. */
  656. kaddr = kmap_atomic(page, KM_USER0);
  657. bpos = (unsigned long *)(kaddr + bh_offset(bh));
  658. bend = (unsigned long *)((u8*)bpos + blocksize);
  659. do {
  660. if (unlikely(*bpos))
  661. break;
  662. } while (likely(++bpos < bend));
  663. kunmap_atomic(kaddr, KM_USER0);
  664. if (bpos == bend) {
  665. /*
  666. * Buffer is zero and sparse, no need to write
  667. * it.
  668. */
  669. bh->b_blocknr = -1;
  670. clear_buffer_dirty(bh);
  671. continue;
  672. }
  673. // TODO: Instantiate the hole.
  674. // clear_buffer_new(bh);
  675. // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
  676. ntfs_error(vol->sb, "Writing into sparse regions is "
  677. "not supported yet. Sorry.");
  678. err = -EOPNOTSUPP;
  679. break;
  680. }
  681. /* If first try and runlist unmapped, map and retry. */
  682. if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
  683. is_retry = TRUE;
  684. /*
  685. * Attempt to map runlist, dropping lock for
  686. * the duration.
  687. */
  688. up_read(&ni->runlist.lock);
  689. err = ntfs_map_runlist(ni, vcn);
  690. if (likely(!err))
  691. goto lock_retry_remap;
  692. rl = NULL;
  693. } else if (!rl)
  694. up_read(&ni->runlist.lock);
  695. /*
  696. * If buffer is outside the runlist, truncate has cut it out
  697. * of the runlist. Just clean and clear the buffer and set it
  698. * uptodate so it can get discarded by the VM.
  699. */
  700. if (err == -ENOENT || lcn == LCN_ENOENT) {
  701. u8 *kaddr;
  702. bh->b_blocknr = -1;
  703. clear_buffer_dirty(bh);
  704. kaddr = kmap_atomic(page, KM_USER0);
  705. memset(kaddr + bh_offset(bh), 0, blocksize);
  706. kunmap_atomic(kaddr, KM_USER0);
  707. flush_dcache_page(page);
  708. set_buffer_uptodate(bh);
  709. err = 0;
  710. continue;
  711. }
  712. /* Failed to map the buffer, even after retrying. */
  713. if (!err)
  714. err = -EIO;
  715. bh->b_blocknr = -1;
  716. ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
  717. "attribute type 0x%x, vcn 0x%llx, offset 0x%x "
  718. "because its location on disk could not be "
  719. "determined%s (error code %i).", ni->mft_no,
  720. ni->type, (unsigned long long)vcn,
  721. vcn_ofs, is_retry ? " even after "
  722. "retrying" : "", err);
  723. break;
  724. } while (block++, (bh = bh->b_this_page) != head);
  725. /* Release the lock if we took it. */
  726. if (rl)
  727. up_read(&ni->runlist.lock);
  728. /* For the error case, need to reset bh to the beginning. */
  729. bh = head;
  730. /* Just an optimization, so ->readpage() is not called later. */
  731. if (unlikely(!PageUptodate(page))) {
  732. int uptodate = 1;
  733. do {
  734. if (!buffer_uptodate(bh)) {
  735. uptodate = 0;
  736. bh = head;
  737. break;
  738. }
  739. } while ((bh = bh->b_this_page) != head);
  740. if (uptodate)
  741. SetPageUptodate(page);
  742. }
  743. /* Setup all mapped, dirty buffers for async write i/o. */
  744. do {
  745. if (buffer_mapped(bh) && buffer_dirty(bh)) {
  746. lock_buffer(bh);
  747. if (test_clear_buffer_dirty(bh)) {
  748. BUG_ON(!buffer_uptodate(bh));
  749. mark_buffer_async_write(bh);
  750. } else
  751. unlock_buffer(bh);
  752. } else if (unlikely(err)) {
  753. /*
  754. * For the error case. The buffer may have been set
  755. * dirty during attachment to a dirty page.
  756. */
  757. if (err != -ENOMEM)
  758. clear_buffer_dirty(bh);
  759. }
  760. } while ((bh = bh->b_this_page) != head);
  761. if (unlikely(err)) {
  762. // TODO: Remove the -EOPNOTSUPP check later on...
  763. if (unlikely(err == -EOPNOTSUPP))
  764. err = 0;
  765. else if (err == -ENOMEM) {
  766. ntfs_warning(vol->sb, "Error allocating memory. "
  767. "Redirtying page so we try again "
  768. "later.");
  769. /*
  770. * Put the page back on mapping->dirty_pages, but
  771. * leave its buffer's dirty state as-is.
  772. */
  773. redirty_page_for_writepage(wbc, page);
  774. err = 0;
  775. } else
  776. SetPageError(page);
  777. }
  778. BUG_ON(PageWriteback(page));
  779. set_page_writeback(page); /* Keeps try_to_free_buffers() away. */
  780. /* Submit the prepared buffers for i/o. */
  781. need_end_writeback = TRUE;
  782. do {
  783. struct buffer_head *next = bh->b_this_page;
  784. if (buffer_async_write(bh)) {
  785. submit_bh(WRITE, bh);
  786. need_end_writeback = FALSE;
  787. }
  788. bh = next;
  789. } while (bh != head);
  790. unlock_page(page);
  791. /* If no i/o was started, need to end_page_writeback(). */
  792. if (unlikely(need_end_writeback))
  793. end_page_writeback(page);
  794. ntfs_debug("Done.");
  795. return err;
  796. }
  797. /**
  798. * ntfs_write_mst_block - write a @page to the backing store
  799. * @page: page cache page to write out
  800. * @wbc: writeback control structure
  801. *
  802. * This function is for writing pages belonging to non-resident, mst protected
  803. * attributes to their backing store. The only supported attributes are index
  804. * allocation and $MFT/$DATA. Both directory inodes and index inodes are
  805. * supported for the index allocation case.
  806. *
  807. * The page must remain locked for the duration of the write because we apply
  808. * the mst fixups, write, and then undo the fixups, so if we were to unlock the
  809. * page before undoing the fixups, any other user of the page will see the
  810. * page contents as corrupt.
  811. *
  812. * We clear the page uptodate flag for the duration of the function to ensure
  813. * exclusion for the $MFT/$DATA case against someone mapping an mft record we
  814. * are about to apply the mst fixups to.
  815. *
  816. * Return 0 on success and -errno on error.
  817. *
  818. * Based on ntfs_write_block(), ntfs_mft_writepage(), and
  819. * write_mft_record_nolock().
  820. */
  821. static int ntfs_write_mst_block(struct page *page,
  822. struct writeback_control *wbc)
  823. {
  824. sector_t block, dblock, rec_block;
  825. struct inode *vi = page->mapping->host;
  826. ntfs_inode *ni = NTFS_I(vi);
  827. ntfs_volume *vol = ni->vol;
  828. u8 *kaddr;
  829. unsigned int rec_size = ni->itype.index.block_size;
  830. ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size];
  831. struct buffer_head *bh, *head, *tbh, *rec_start_bh;
  832. struct buffer_head *bhs[MAX_BUF_PER_PAGE];
  833. runlist_element *rl;
  834. int i, nr_locked_nis, nr_recs, nr_bhs, max_bhs, bhs_per_rec, err, err2;
  835. unsigned bh_size, rec_size_bits;
  836. BOOL sync, is_mft, page_is_dirty, rec_is_dirty;
  837. unsigned char bh_size_bits;
  838. ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
  839. "0x%lx.", vi->i_ino, ni->type, page->index);
  840. BUG_ON(!NInoNonResident(ni));
  841. BUG_ON(!NInoMstProtected(ni));
  842. is_mft = (S_ISREG(vi->i_mode) && !vi->i_ino);
  843. /*
  844. * NOTE: ntfs_write_mst_block() would be called for $MFTMirr if a page
  845. * in its page cache were to be marked dirty. However this should
  846. * never happen with the current driver and considering we do not
  847. * handle this case here we do want to BUG(), at least for now.
  848. */
  849. BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) ||
  850. (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
  851. bh_size_bits = vi->i_blkbits;
  852. bh_size = 1 << bh_size_bits;
  853. max_bhs = PAGE_CACHE_SIZE / bh_size;
  854. BUG_ON(!max_bhs);
  855. BUG_ON(max_bhs > MAX_BUF_PER_PAGE);
  856. /* Were we called for sync purposes? */
  857. sync = (wbc->sync_mode == WB_SYNC_ALL);
  858. /* Make sure we have mapped buffers. */
  859. bh = head = page_buffers(page);
  860. BUG_ON(!bh);
  861. rec_size_bits = ni->itype.index.block_size_bits;
  862. BUG_ON(!(PAGE_CACHE_SIZE >> rec_size_bits));
  863. bhs_per_rec = rec_size >> bh_size_bits;
  864. BUG_ON(!bhs_per_rec);
  865. /* The first block in the page. */
  866. rec_block = block = (sector_t)page->index <<
  867. (PAGE_CACHE_SHIFT - bh_size_bits);
  868. /* The first out of bounds block for the data size. */
  869. dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits;
  870. rl = NULL;
  871. err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0;
  872. page_is_dirty = rec_is_dirty = FALSE;
  873. rec_start_bh = NULL;
  874. do {
  875. BOOL is_retry = FALSE;
  876. if (likely(block < rec_block)) {
  877. if (unlikely(block >= dblock)) {
  878. clear_buffer_dirty(bh);
  879. set_buffer_uptodate(bh);
  880. continue;
  881. }
  882. /*
  883. * This block is not the first one in the record. We
  884. * ignore the buffer's dirty state because we could
  885. * have raced with a parallel mark_ntfs_record_dirty().
  886. */
  887. if (!rec_is_dirty)
  888. continue;
  889. if (unlikely(err2)) {
  890. if (err2 != -ENOMEM)
  891. clear_buffer_dirty(bh);
  892. continue;
  893. }
  894. } else /* if (block == rec_block) */ {
  895. BUG_ON(block > rec_block);
  896. /* This block is the first one in the record. */
  897. rec_block += bhs_per_rec;
  898. err2 = 0;
  899. if (unlikely(block >= dblock)) {
  900. clear_buffer_dirty(bh);
  901. continue;
  902. }
  903. if (!buffer_dirty(bh)) {
  904. /* Clean records are not written out. */
  905. rec_is_dirty = FALSE;
  906. continue;
  907. }
  908. rec_is_dirty = TRUE;
  909. rec_start_bh = bh;
  910. }
  911. /* Need to map the buffer if it is not mapped already. */
  912. if (unlikely(!buffer_mapped(bh))) {
  913. VCN vcn;
  914. LCN lcn;
  915. unsigned int vcn_ofs;
  916. bh->b_bdev = vol->sb->s_bdev;
  917. /* Obtain the vcn and offset of the current block. */
  918. vcn = (VCN)block << bh_size_bits;
  919. vcn_ofs = vcn & vol->cluster_size_mask;
  920. vcn >>= vol->cluster_size_bits;
  921. if (!rl) {
  922. lock_retry_remap:
  923. down_read(&ni->runlist.lock);
  924. rl = ni->runlist.rl;
  925. }
  926. if (likely(rl != NULL)) {
  927. /* Seek to element containing target vcn. */
  928. while (rl->length && rl[1].vcn <= vcn)
  929. rl++;
  930. lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
  931. } else
  932. lcn = LCN_RL_NOT_MAPPED;
  933. /* Successful remap. */
  934. if (likely(lcn >= 0)) {
  935. /* Setup buffer head to correct block. */
  936. bh->b_blocknr = ((lcn <<
  937. vol->cluster_size_bits) +
  938. vcn_ofs) >> bh_size_bits;
  939. set_buffer_mapped(bh);
  940. } else {
  941. /*
  942. * Remap failed. Retry to map the runlist once
  943. * unless we are working on $MFT which always
  944. * has the whole of its runlist in memory.
  945. */
  946. if (!is_mft && !is_retry &&
  947. lcn == LCN_RL_NOT_MAPPED) {
  948. is_retry = TRUE;
  949. /*
  950. * Attempt to map runlist, dropping
  951. * lock for the duration.
  952. */
  953. up_read(&ni->runlist.lock);
  954. err2 = ntfs_map_runlist(ni, vcn);
  955. if (likely(!err2))
  956. goto lock_retry_remap;
  957. if (err2 == -ENOMEM)
  958. page_is_dirty = TRUE;
  959. lcn = err2;
  960. } else {
  961. err2 = -EIO;
  962. if (!rl)
  963. up_read(&ni->runlist.lock);
  964. }
  965. /* Hard error. Abort writing this record. */
  966. if (!err || err == -ENOMEM)
  967. err = err2;
  968. bh->b_blocknr = -1;
  969. ntfs_error(vol->sb, "Cannot write ntfs record "
  970. "0x%llx (inode 0x%lx, "
  971. "attribute type 0x%x) because "
  972. "its location on disk could "
  973. "not be determined (error "
  974. "code %lli).",
  975. (long long)block <<
  976. bh_size_bits >>
  977. vol->mft_record_size_bits,
  978. ni->mft_no, ni->type,
  979. (long long)lcn);
  980. /*
  981. * If this is not the first buffer, remove the
  982. * buffers in this record from the list of
  983. * buffers to write and clear their dirty bit
  984. * if not error -ENOMEM.
  985. */
  986. if (rec_start_bh != bh) {
  987. while (bhs[--nr_bhs] != rec_start_bh)
  988. ;
  989. if (err2 != -ENOMEM) {
  990. do {
  991. clear_buffer_dirty(
  992. rec_start_bh);
  993. } while ((rec_start_bh =
  994. rec_start_bh->
  995. b_this_page) !=
  996. bh);
  997. }
  998. }
  999. continue;
  1000. }
  1001. }
  1002. BUG_ON(!buffer_uptodate(bh));
  1003. BUG_ON(nr_bhs >= max_bhs);
  1004. bhs[nr_bhs++] = bh;
  1005. } while (block++, (bh = bh->b_this_page) != head);
  1006. if (unlikely(rl))
  1007. up_read(&ni->runlist.lock);
  1008. /* If there were no dirty buffers, we are done. */
  1009. if (!nr_bhs)
  1010. goto done;
  1011. /* Map the page so we can access its contents. */
  1012. kaddr = kmap(page);
  1013. /* Clear the page uptodate flag whilst the mst fixups are applied. */
  1014. BUG_ON(!PageUptodate(page));
  1015. ClearPageUptodate(page);
  1016. for (i = 0; i < nr_bhs; i++) {
  1017. unsigned int ofs;
  1018. /* Skip buffers which are not at the beginning of records. */
  1019. if (i % bhs_per_rec)
  1020. continue;
  1021. tbh = bhs[i];
  1022. ofs = bh_offset(tbh);
  1023. if (is_mft) {
  1024. ntfs_inode *tni;
  1025. unsigned long mft_no;
  1026. /* Get the mft record number. */
  1027. mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
  1028. >> rec_size_bits;
  1029. /* Check whether to write this mft record. */
  1030. tni = NULL;
  1031. if (!ntfs_may_write_mft_record(vol, mft_no,
  1032. (MFT_RECORD*)(kaddr + ofs), &tni)) {
  1033. /*
  1034. * The record should not be written. This
  1035. * means we need to redirty the page before
  1036. * returning.
  1037. */
  1038. page_is_dirty = TRUE;
  1039. /*
  1040. * Remove the buffers in this mft record from
  1041. * the list of buffers to write.
  1042. */
  1043. do {
  1044. bhs[i] = NULL;
  1045. } while (++i % bhs_per_rec);
  1046. continue;
  1047. }
  1048. /*
  1049. * The record should be written. If a locked ntfs
  1050. * inode was returned, add it to the array of locked
  1051. * ntfs inodes.
  1052. */
  1053. if (tni)
  1054. locked_nis[nr_locked_nis++] = tni;
  1055. }
  1056. /* Apply the mst protection fixups. */
  1057. err2 = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs),
  1058. rec_size);
  1059. if (unlikely(err2)) {
  1060. if (!err || err == -ENOMEM)
  1061. err = -EIO;
  1062. ntfs_error(vol->sb, "Failed to apply mst fixups "
  1063. "(inode 0x%lx, attribute type 0x%x, "
  1064. "page index 0x%lx, page offset 0x%x)!"
  1065. " Unmount and run chkdsk.", vi->i_ino,
  1066. ni->type, page->index, ofs);
  1067. /*
  1068. * Mark all the buffers in this record clean as we do
  1069. * not want to write corrupt data to disk.
  1070. */
  1071. do {
  1072. clear_buffer_dirty(bhs[i]);
  1073. bhs[i] = NULL;
  1074. } while (++i % bhs_per_rec);
  1075. continue;
  1076. }
  1077. nr_recs++;
  1078. }
  1079. /* If no records are to be written out, we are done. */
  1080. if (!nr_recs)
  1081. goto unm_done;
  1082. flush_dcache_page(page);
  1083. /* Lock buffers and start synchronous write i/o on them. */
  1084. for (i = 0; i < nr_bhs; i++) {
  1085. tbh = bhs[i];
  1086. if (!tbh)
  1087. continue;
  1088. if (unlikely(test_set_buffer_locked(tbh)))
  1089. BUG();
  1090. /* The buffer dirty state is now irrelevant, just clean it. */
  1091. clear_buffer_dirty(tbh);
  1092. BUG_ON(!buffer_uptodate(tbh));
  1093. BUG_ON(!buffer_mapped(tbh));
  1094. get_bh(tbh);
  1095. tbh->b_end_io = end_buffer_write_sync;
  1096. submit_bh(WRITE, tbh);
  1097. }
  1098. /* Synchronize the mft mirror now if not @sync. */
  1099. if (is_mft && !sync)
  1100. goto do_mirror;
  1101. do_wait:
  1102. /* Wait on i/o completion of buffers. */
  1103. for (i = 0; i < nr_bhs; i++) {
  1104. tbh = bhs[i];
  1105. if (!tbh)
  1106. continue;
  1107. wait_on_buffer(tbh);
  1108. if (unlikely(!buffer_uptodate(tbh))) {
  1109. ntfs_error(vol->sb, "I/O error while writing ntfs "
  1110. "record buffer (inode 0x%lx, "
  1111. "attribute type 0x%x, page index "
  1112. "0x%lx, page offset 0x%lx)! Unmount "
  1113. "and run chkdsk.", vi->i_ino, ni->type,
  1114. page->index, bh_offset(tbh));
  1115. if (!err || err == -ENOMEM)
  1116. err = -EIO;
  1117. /*
  1118. * Set the buffer uptodate so the page and buffer
  1119. * states do not become out of sync.
  1120. */
  1121. set_buffer_uptodate(tbh);
  1122. }
  1123. }
  1124. /* If @sync, now synchronize the mft mirror. */
  1125. if (is_mft && sync) {
  1126. do_mirror:
  1127. for (i = 0; i < nr_bhs; i++) {
  1128. unsigned long mft_no;
  1129. unsigned int ofs;
  1130. /*
  1131. * Skip buffers which are not at the beginning of
  1132. * records.
  1133. */
  1134. if (i % bhs_per_rec)
  1135. continue;
  1136. tbh = bhs[i];
  1137. /* Skip removed buffers (and hence records). */
  1138. if (!tbh)
  1139. continue;
  1140. ofs = bh_offset(tbh);
  1141. /* Get the mft record number. */
  1142. mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
  1143. >> rec_size_bits;
  1144. if (mft_no < vol->mftmirr_size)
  1145. ntfs_sync_mft_mirror(vol, mft_no,
  1146. (MFT_RECORD*)(kaddr + ofs),
  1147. sync);
  1148. }
  1149. if (!sync)
  1150. goto do_wait;
  1151. }
  1152. /* Remove the mst protection fixups again. */
  1153. for (i = 0; i < nr_bhs; i++) {
  1154. if (!(i % bhs_per_rec)) {
  1155. tbh = bhs[i];
  1156. if (!tbh)
  1157. continue;
  1158. post_write_mst_fixup((NTFS_RECORD*)(kaddr +
  1159. bh_offset(tbh)));
  1160. }
  1161. }
  1162. flush_dcache_page(page);
  1163. unm_done:
  1164. /* Unlock any locked inodes. */
  1165. while (nr_locked_nis-- > 0) {
  1166. ntfs_inode *tni, *base_tni;
  1167. tni = locked_nis[nr_locked_nis];
  1168. /* Get the base inode. */
  1169. down(&tni->extent_lock);
  1170. if (tni->nr_extents >= 0)
  1171. base_tni = tni;
  1172. else {
  1173. base_tni = tni->ext.base_ntfs_ino;
  1174. BUG_ON(!base_tni);
  1175. }
  1176. up(&tni->extent_lock);
  1177. ntfs_debug("Unlocking %s inode 0x%lx.",
  1178. tni == base_tni ? "base" : "extent",
  1179. tni->mft_no);
  1180. up(&tni->mrec_lock);
  1181. atomic_dec(&tni->count);
  1182. iput(VFS_I(base_tni));
  1183. }
  1184. SetPageUptodate(page);
  1185. kunmap(page);
  1186. done:
  1187. if (unlikely(err && err != -ENOMEM)) {
  1188. /*
  1189. * Set page error if there is only one ntfs record in the page.
  1190. * Otherwise we would loose per-record granularity.
  1191. */
  1192. if (ni->itype.index.block_size == PAGE_CACHE_SIZE)
  1193. SetPageError(page);
  1194. NVolSetErrors(vol);
  1195. }
  1196. if (page_is_dirty) {
  1197. ntfs_debug("Page still contains one or more dirty ntfs "
  1198. "records. Redirtying the page starting at "
  1199. "record 0x%lx.", page->index <<
  1200. (PAGE_CACHE_SHIFT - rec_size_bits));
  1201. redirty_page_for_writepage(wbc, page);
  1202. unlock_page(page);
  1203. } else {
  1204. /*
  1205. * Keep the VM happy. This must be done otherwise the
  1206. * radix-tree tag PAGECACHE_TAG_DIRTY remains set even though
  1207. * the page is clean.
  1208. */
  1209. BUG_ON(PageWriteback(page));
  1210. set_page_writeback(page);
  1211. unlock_page(page);
  1212. end_page_writeback(page);
  1213. }
  1214. if (likely(!err))
  1215. ntfs_debug("Done.");
  1216. return err;
  1217. }
  1218. /**
  1219. * ntfs_writepage - write a @page to the backing store
  1220. * @page: page cache page to write out
  1221. * @wbc: writeback control structure
  1222. *
  1223. * This is called from the VM when it wants to have a dirty ntfs page cache
  1224. * page cleaned. The VM has already locked the page and marked it clean.
  1225. *
  1226. * For non-resident attributes, ntfs_writepage() writes the @page by calling
  1227. * the ntfs version of the generic block_write_full_page() function,
  1228. * ntfs_write_block(), which in turn if necessary creates and writes the
  1229. * buffers associated with the page asynchronously.
  1230. *
  1231. * For resident attributes, OTOH, ntfs_writepage() writes the @page by copying
  1232. * the data to the mft record (which at this stage is most likely in memory).
  1233. * The mft record is then marked dirty and written out asynchronously via the
  1234. * vfs inode dirty code path for the inode the mft record belongs to or via the
  1235. * vm page dirty code path for the page the mft record is in.
  1236. *
  1237. * Based on ntfs_readpage() and fs/buffer.c::block_write_full_page().
  1238. *
  1239. * Return 0 on success and -errno on error.
  1240. */
  1241. static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
  1242. {
  1243. loff_t i_size;
  1244. struct inode *vi = page->mapping->host;
  1245. ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
  1246. char *kaddr;
  1247. ntfs_attr_search_ctx *ctx = NULL;
  1248. MFT_RECORD *m = NULL;
  1249. u32 attr_len;
  1250. int err;
  1251. retry_writepage:
  1252. BUG_ON(!PageLocked(page));
  1253. i_size = i_size_read(vi);
  1254. /* Is the page fully outside i_size? (truncate in progress) */
  1255. if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
  1256. PAGE_CACHE_SHIFT)) {
  1257. /*
  1258. * The page may have dirty, unmapped buffers. Make them
  1259. * freeable here, so the page does not leak.
  1260. */
  1261. block_invalidatepage(page, 0);
  1262. unlock_page(page);
  1263. ntfs_debug("Write outside i_size - truncated?");
  1264. return 0;
  1265. }
  1266. /*
  1267. * Only $DATA attributes can be encrypted and only unnamed $DATA
  1268. * attributes can be compressed. Index root can have the flags set but
  1269. * this means to create compressed/encrypted files, not that the
  1270. * attribute is compressed/encrypted.
  1271. */
  1272. if (ni->type != AT_INDEX_ROOT) {
  1273. /* If file is encrypted, deny access, just like NT4. */
  1274. if (NInoEncrypted(ni)) {
  1275. unlock_page(page);
  1276. BUG_ON(ni->type != AT_DATA);
  1277. ntfs_debug("Denying write access to encrypted "
  1278. "file.");
  1279. return -EACCES;
  1280. }
  1281. /* Compressed data streams are handled in compress.c. */
  1282. if (NInoNonResident(ni) && NInoCompressed(ni)) {
  1283. BUG_ON(ni->type != AT_DATA);
  1284. BUG_ON(ni->name_len);
  1285. // TODO: Implement and replace this with
  1286. // return ntfs_write_compressed_block(page);
  1287. unlock_page(page);
  1288. ntfs_error(vi->i_sb, "Writing to compressed files is "
  1289. "not supported yet. Sorry.");
  1290. return -EOPNOTSUPP;
  1291. }
  1292. // TODO: Implement and remove this check.
  1293. if (NInoNonResident(ni) && NInoSparse(ni)) {
  1294. unlock_page(page);
  1295. ntfs_error(vi->i_sb, "Writing to sparse files is not "
  1296. "supported yet. Sorry.");
  1297. return -EOPNOTSUPP;
  1298. }
  1299. }
  1300. /* NInoNonResident() == NInoIndexAllocPresent() */
  1301. if (NInoNonResident(ni)) {
  1302. /* We have to zero every time due to mmap-at-end-of-file. */
  1303. if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
  1304. /* The page straddles i_size. */
  1305. unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
  1306. kaddr = kmap_atomic(page, KM_USER0);
  1307. memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs);
  1308. flush_dcache_page(page);
  1309. kunmap_atomic(kaddr, KM_USER0);
  1310. }
  1311. /* Handle mst protected attributes. */
  1312. if (NInoMstProtected(ni))
  1313. return ntfs_write_mst_block(page, wbc);
  1314. /* Normal, non-resident data stream. */
  1315. return ntfs_write_block(page, wbc);
  1316. }
  1317. /*
  1318. * Attribute is resident, implying it is not compressed, encrypted, or
  1319. * mst protected. This also means the attribute is smaller than an mft
  1320. * record and hence smaller than a page, so can simply return error on
  1321. * any pages with index above 0. Note the attribute can actually be
  1322. * marked compressed but if it is resident the actual data is not
  1323. * compressed so we are ok to ignore the compressed flag here.
  1324. */
  1325. BUG_ON(page_has_buffers(page));
  1326. BUG_ON(!PageUptodate(page));
  1327. if (unlikely(page->index > 0)) {
  1328. ntfs_error(vi->i_sb, "BUG()! page->index (0x%lx) > 0. "
  1329. "Aborting write.", page->index);
  1330. BUG_ON(PageWriteback(page));
  1331. set_page_writeback(page);
  1332. unlock_page(page);
  1333. end_page_writeback(page);
  1334. return -EIO;
  1335. }
  1336. if (!NInoAttr(ni))
  1337. base_ni = ni;
  1338. else
  1339. base_ni = ni->ext.base_ntfs_ino;
  1340. /* Map, pin, and lock the mft record. */
  1341. m = map_mft_record(base_ni);
  1342. if (IS_ERR(m)) {
  1343. err = PTR_ERR(m);
  1344. m = NULL;
  1345. ctx = NULL;
  1346. goto err_out;
  1347. }
  1348. /*
  1349. * If a parallel write made the attribute non-resident, drop the mft
  1350. * record and retry the writepage.
  1351. */
  1352. if (unlikely(NInoNonResident(ni))) {
  1353. unmap_mft_record(base_ni);
  1354. goto retry_writepage;
  1355. }
  1356. ctx = ntfs_attr_get_search_ctx(base_ni, m);
  1357. if (unlikely(!ctx)) {
  1358. err = -ENOMEM;
  1359. goto err_out;
  1360. }
  1361. err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
  1362. CASE_SENSITIVE, 0, NULL, 0, ctx);
  1363. if (unlikely(err))
  1364. goto err_out;
  1365. /*
  1366. * Keep the VM happy. This must be done otherwise the radix-tree tag
  1367. * PAGECACHE_TAG_DIRTY remains set even though the page is clean.
  1368. */
  1369. BUG_ON(PageWriteback(page));
  1370. set_page_writeback(page);
  1371. unlock_page(page);
  1372. /*
  1373. * Here, we do not need to zero the out of bounds area everytime
  1374. * because the below memcpy() already takes care of the
  1375. * mmap-at-end-of-file requirements. If the file is converted to a
  1376. * non-resident one, then the code path use is switched to the
  1377. * non-resident one where the zeroing happens on each ntfs_writepage()
  1378. * invocation.
  1379. */
  1380. attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
  1381. i_size = i_size_read(vi);
  1382. if (unlikely(attr_len > i_size)) {
  1383. attr_len = i_size;
  1384. ctx->attr->data.resident.value_length = cpu_to_le32(attr_len);
  1385. }
  1386. kaddr = kmap_atomic(page, KM_USER0);
  1387. /* Copy the data from the page to the mft record. */
  1388. memcpy((u8*)ctx->attr +
  1389. le16_to_cpu(ctx->attr->data.resident.value_offset),
  1390. kaddr, attr_len);
  1391. flush_dcache_mft_record_page(ctx->ntfs_ino);
  1392. /* Zero out of bounds area in the page cache page. */
  1393. memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
  1394. flush_dcache_page(page);
  1395. kunmap_atomic(kaddr, KM_USER0);
  1396. end_page_writeback(page);
  1397. /* Mark the mft record dirty, so it gets written back. */
  1398. mark_mft_record_dirty(ctx->ntfs_ino);
  1399. ntfs_attr_put_search_ctx(ctx);
  1400. unmap_mft_record(base_ni);
  1401. return 0;
  1402. err_out:
  1403. if (err == -ENOMEM) {
  1404. ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying "
  1405. "page so we try again later.");
  1406. /*
  1407. * Put the page back on mapping->dirty_pages, but leave its
  1408. * buffers' dirty state as-is.
  1409. */
  1410. redirty_page_for_writepage(wbc, page);
  1411. err = 0;
  1412. } else {
  1413. ntfs_error(vi->i_sb, "Resident attribute write failed with "
  1414. "error %i.", err);
  1415. SetPageError(page);
  1416. NVolSetErrors(ni->vol);
  1417. make_bad_inode(vi);
  1418. }
  1419. unlock_page(page);
  1420. if (ctx)
  1421. ntfs_attr_put_search_ctx(ctx);
  1422. if (m)
  1423. unmap_mft_record(base_ni);
  1424. return err;
  1425. }
  1426. /**
  1427. * ntfs_prepare_nonresident_write -
  1428. *
  1429. */
  1430. static int ntfs_prepare_nonresident_write(struct page *page,
  1431. unsigned from, unsigned to)
  1432. {
  1433. VCN vcn;
  1434. LCN lcn;
  1435. s64 initialized_size;
  1436. loff_t i_size;
  1437. sector_t block, ablock, iblock;
  1438. struct inode *vi;
  1439. ntfs_inode *ni;
  1440. ntfs_volume *vol;
  1441. runlist_element *rl;
  1442. struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
  1443. unsigned long flags;
  1444. unsigned int vcn_ofs, block_start, block_end, blocksize;
  1445. int err;
  1446. BOOL is_retry;
  1447. unsigned char blocksize_bits;
  1448. vi = page->mapping->host;
  1449. ni = NTFS_I(vi);
  1450. vol = ni->vol;
  1451. ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
  1452. "0x%lx, from = %u, to = %u.", ni->mft_no, ni->type,
  1453. page->index, from, to);
  1454. BUG_ON(!NInoNonResident(ni));
  1455. blocksize_bits = vi->i_blkbits;
  1456. blocksize = 1 << blocksize_bits;
  1457. /*
  1458. * create_empty_buffers() will create uptodate/dirty buffers if the
  1459. * page is uptodate/dirty.
  1460. */
  1461. if (!page_has_buffers(page))
  1462. create_empty_buffers(page, blocksize, 0);
  1463. bh = head = page_buffers(page);
  1464. if (unlikely(!bh))
  1465. return -ENOMEM;
  1466. /* The first block in the page. */
  1467. block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
  1468. read_lock_irqsave(&ni->size_lock, flags);
  1469. /*
  1470. * The first out of bounds block for the allocated size. No need to
  1471. * round up as allocated_size is in multiples of cluster size and the
  1472. * minimum cluster size is 512 bytes, which is equal to the smallest
  1473. * blocksize.
  1474. */
  1475. ablock = ni->allocated_size >> blocksize_bits;
  1476. i_size = i_size_read(vi);
  1477. initialized_size = ni->initialized_size;
  1478. read_unlock_irqrestore(&ni->size_lock, flags);
  1479. /* The last (fully or partially) initialized block. */
  1480. iblock = initialized_size >> blocksize_bits;
  1481. /* Loop through all the buffers in the page. */
  1482. block_start = 0;
  1483. rl = NULL;
  1484. err = 0;
  1485. do {
  1486. block_end = block_start + blocksize;
  1487. /*
  1488. * If buffer @bh is outside the write, just mark it uptodate
  1489. * if the page is uptodate and continue with the next buffer.
  1490. */
  1491. if (block_end <= from || block_start >= to) {
  1492. if (PageUptodate(page)) {
  1493. if (!buffer_uptodate(bh))
  1494. set_buffer_uptodate(bh);
  1495. }
  1496. continue;
  1497. }
  1498. /*
  1499. * @bh is at least partially being written to.
  1500. * Make sure it is not marked as new.
  1501. */
  1502. //if (buffer_new(bh))
  1503. // clear_buffer_new(bh);
  1504. if (block >= ablock) {
  1505. // TODO: block is above allocated_size, need to
  1506. // allocate it. Best done in one go to accommodate not
  1507. // only block but all above blocks up to and including:
  1508. // ((page->index << PAGE_CACHE_SHIFT) + to + blocksize
  1509. // - 1) >> blobksize_bits. Obviously will need to round
  1510. // up to next cluster boundary, too. This should be
  1511. // done with a helper function, so it can be reused.
  1512. ntfs_error(vol->sb, "Writing beyond allocated size "
  1513. "is not supported yet. Sorry.");
  1514. err = -EOPNOTSUPP;
  1515. goto err_out;
  1516. // Need to update ablock.
  1517. // Need to set_buffer_new() on all block bhs that are
  1518. // newly allocated.
  1519. }
  1520. /*
  1521. * Now we have enough allocated size to fulfill the whole
  1522. * request, i.e. block < ablock is true.
  1523. */
  1524. if (unlikely((block >= iblock) &&
  1525. (initialized_size < i_size))) {
  1526. /*
  1527. * If this page is fully outside initialized size, zero
  1528. * out all pages between the current initialized size
  1529. * and the current page. Just use ntfs_readpage() to do
  1530. * the zeroing transparently.
  1531. */
  1532. if (block > iblock) {
  1533. // TODO:
  1534. // For each page do:
  1535. // - read_cache_page()
  1536. // Again for each page do:
  1537. // - wait_on_page_locked()
  1538. // - Check (PageUptodate(page) &&
  1539. // !PageError(page))
  1540. // Update initialized size in the attribute and
  1541. // in the inode.
  1542. // Again, for each page do:
  1543. // __set_page_dirty_buffers();
  1544. // page_cache_release()
  1545. // We don't need to wait on the writes.
  1546. // Update iblock.
  1547. }
  1548. /*
  1549. * The current page straddles initialized size. Zero
  1550. * all non-uptodate buffers and set them uptodate (and
  1551. * dirty?). Note, there aren't any non-uptodate buffers
  1552. * if the page is uptodate.
  1553. * FIXME: For an uptodate page, the buffers may need to
  1554. * be written out because they were not initialized on
  1555. * disk before.
  1556. */
  1557. if (!PageUptodate(page)) {
  1558. // TODO:
  1559. // Zero any non-uptodate buffers up to i_size.
  1560. // Set them uptodate and dirty.
  1561. }
  1562. // TODO:
  1563. // Update initialized size in the attribute and in the
  1564. // inode (up to i_size).
  1565. // Update iblock.
  1566. // FIXME: This is inefficient. Try to batch the two
  1567. // size changes to happen in one go.
  1568. ntfs_error(vol->sb, "Writing beyond initialized size "
  1569. "is not supported yet. Sorry.");
  1570. err = -EOPNOTSUPP;
  1571. goto err_out;
  1572. // Do NOT set_buffer_new() BUT DO clear buffer range
  1573. // outside write request range.
  1574. // set_buffer_uptodate() on complete buffers as well as
  1575. // set_buffer_dirty().
  1576. }
  1577. /* Need to map unmapped buffers. */
  1578. if (!buffer_mapped(bh)) {
  1579. /* Unmapped buffer. Need to map it. */
  1580. bh->b_bdev = vol->sb->s_bdev;
  1581. /* Convert block into corresponding vcn and offset. */
  1582. vcn = (VCN)block << blocksize_bits >>
  1583. vol->cluster_size_bits;
  1584. vcn_ofs = ((VCN)block << blocksize_bits) &
  1585. vol->cluster_size_mask;
  1586. is_retry = FALSE;
  1587. if (!rl) {
  1588. lock_retry_remap:
  1589. down_read(&ni->runlist.lock);
  1590. rl = ni->runlist.rl;
  1591. }
  1592. if (likely(rl != NULL)) {
  1593. /* Seek to element containing target vcn. */
  1594. while (rl->length && rl[1].vcn <= vcn)
  1595. rl++;
  1596. lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
  1597. } else
  1598. lcn = LCN_RL_NOT_MAPPED;
  1599. if (unlikely(lcn < 0)) {
  1600. /*
  1601. * We extended the attribute allocation above.
  1602. * If we hit an ENOENT here it means that the
  1603. * allocation was insufficient which is a bug.
  1604. */
  1605. BUG_ON(lcn == LCN_ENOENT);
  1606. /* It is a hole, need to instantiate it. */
  1607. if (lcn == LCN_HOLE) {
  1608. // TODO: Instantiate the hole.
  1609. // clear_buffer_new(bh);
  1610. // unmap_underlying_metadata(bh->b_bdev,
  1611. // bh->b_blocknr);
  1612. // For non-uptodate buffers, need to
  1613. // zero out the region outside the
  1614. // request in this bh or all bhs,
  1615. // depending on what we implemented
  1616. // above.
  1617. // Need to flush_dcache_page().
  1618. // Or could use set_buffer_new()
  1619. // instead?
  1620. ntfs_error(vol->sb, "Writing into "
  1621. "sparse regions is "
  1622. "not supported yet. "
  1623. "Sorry.");
  1624. err = -EOPNOTSUPP;
  1625. if (!rl)
  1626. up_read(&ni->runlist.lock);
  1627. goto err_out;
  1628. } else if (!is_retry &&
  1629. lcn == LCN_RL_NOT_MAPPED) {
  1630. is_retry = TRUE;
  1631. /*
  1632. * Attempt to map runlist, dropping
  1633. * lock for the duration.
  1634. */
  1635. up_read(&ni->runlist.lock);
  1636. err = ntfs_map_runlist(ni, vcn);
  1637. if (likely(!err))
  1638. goto lock_retry_remap;
  1639. rl = NULL;
  1640. lcn = err;
  1641. } else if (!rl)
  1642. up_read(&ni->runlist.lock);
  1643. /*
  1644. * Failed to map the buffer, even after
  1645. * retrying.
  1646. */
  1647. bh->b_blocknr = -1;
  1648. ntfs_error(vol->sb, "Failed to write to inode "
  1649. "0x%lx, attribute type 0x%x, "
  1650. "vcn 0x%llx, offset 0x%x "
  1651. "because its location on disk "
  1652. "could not be determined%s "
  1653. "(error code %lli).",
  1654. ni->mft_no, ni->type,
  1655. (unsigned long long)vcn,
  1656. vcn_ofs, is_retry ? " even "
  1657. "after retrying" : "",
  1658. (long long)lcn);
  1659. if (!err)
  1660. err = -EIO;
  1661. goto err_out;
  1662. }
  1663. /* We now have a successful remap, i.e. lcn >= 0. */
  1664. /* Setup buffer head to correct block. */
  1665. bh->b_blocknr = ((lcn << vol->cluster_size_bits)
  1666. + vcn_ofs) >> blocksize_bits;
  1667. set_buffer_mapped(bh);
  1668. // FIXME: Something analogous to this is needed for
  1669. // each newly allocated block, i.e. BH_New.
  1670. // FIXME: Might need to take this out of the
  1671. // if (!buffer_mapped(bh)) {}, depending on how we
  1672. // implement things during the allocated_size and
  1673. // initialized_size extension code above.
  1674. if (buffer_new(bh)) {
  1675. clear_buffer_new(bh);
  1676. unmap_underlying_metadata(bh->b_bdev,
  1677. bh->b_blocknr);
  1678. if (PageUptodate(page)) {
  1679. set_buffer_uptodate(bh);
  1680. continue;
  1681. }
  1682. /*
  1683. * Page is _not_ uptodate, zero surrounding
  1684. * region. NOTE: This is how we decide if to
  1685. * zero or not!
  1686. */
  1687. if (block_end > to || block_start < from) {
  1688. void *kaddr;
  1689. kaddr = kmap_atomic(page, KM_USER0);
  1690. if (block_end > to)
  1691. memset(kaddr + to, 0,
  1692. block_end - to);
  1693. if (block_start < from)
  1694. memset(kaddr + block_start, 0,
  1695. from -
  1696. block_start);
  1697. flush_dcache_page(page);
  1698. kunmap_atomic(kaddr, KM_USER0);
  1699. }
  1700. continue;
  1701. }
  1702. }
  1703. /* @bh is mapped, set it uptodate if the page is uptodate. */
  1704. if (PageUptodate(page)) {
  1705. if (!buffer_uptodate(bh))
  1706. set_buffer_uptodate(bh);
  1707. continue;
  1708. }
  1709. /*
  1710. * The page is not uptodate. The buffer is mapped. If it is not
  1711. * uptodate, and it is only partially being written to, we need
  1712. * to read the buffer in before the write, i.e. right now.
  1713. */
  1714. if (!buffer_uptodate(bh) &&
  1715. (block_start < from || block_end > to)) {
  1716. ll_rw_block(READ, 1, &bh);
  1717. *wait_bh++ = bh;
  1718. }
  1719. } while (block++, block_start = block_end,
  1720. (bh = bh->b_this_page) != head);
  1721. /* Release the lock if we took it. */
  1722. if (rl) {
  1723. up_read(&ni->runlist.lock);
  1724. rl = NULL;
  1725. }
  1726. /* If we issued read requests, let them complete. */
  1727. while (wait_bh > wait) {
  1728. wait_on_buffer(*--wait_bh);
  1729. if (!buffer_uptodate(*wait_bh))
  1730. return -EIO;
  1731. }
  1732. ntfs_debug("Done.");
  1733. return 0;
  1734. err_out:
  1735. /*
  1736. * Zero out any newly allocated blocks to avoid exposing stale data.
  1737. * If BH_New is set, we know that the block was newly allocated in the
  1738. * above loop.
  1739. * FIXME: What about initialized_size increments? Have we done all the
  1740. * required zeroing above? If not this error handling is broken, and
  1741. * in particular the if (block_end <= from) check is completely bogus.
  1742. */
  1743. bh = head;
  1744. block_start = 0;
  1745. is_retry = FALSE;
  1746. do {
  1747. block_end = block_start + blocksize;
  1748. if (block_end <= from)
  1749. continue;
  1750. if (block_start >= to)
  1751. break;
  1752. if (buffer_new(bh)) {
  1753. void *kaddr;
  1754. clear_buffer_new(bh);
  1755. kaddr = kmap_atomic(page, KM_USER0);
  1756. memset(kaddr + block_start, 0, bh->b_size);
  1757. kunmap_atomic(kaddr, KM_USER0);
  1758. set_buffer_uptodate(bh);
  1759. mark_buffer_dirty(bh);
  1760. is_retry = TRUE;
  1761. }
  1762. } while (block_start = block_end, (bh = bh->b_this_page) != head);
  1763. if (is_retry)
  1764. flush_dcache_page(page);
  1765. if (rl)
  1766. up_read(&ni->runlist.lock);
  1767. return err;
  1768. }
  1769. /**
  1770. * ntfs_prepare_write - prepare a page for receiving data
  1771. *
  1772. * This is called from generic_file_write() with i_sem held on the inode
  1773. * (@page->mapping->host). The @page is locked but not kmap()ped. The source
  1774. * data has not yet been copied into the @page.
  1775. *
  1776. * Need to extend the attribute/fill in holes if necessary, create blocks and
  1777. * make partially overwritten blocks uptodate,
  1778. *
  1779. * i_size is not to be modified yet.
  1780. *
  1781. * Return 0 on success or -errno on error.
  1782. *
  1783. * Should be using block_prepare_write() [support for sparse files] or
  1784. * cont_prepare_write() [no support for sparse files]. Cannot do that due to
  1785. * ntfs specifics but can look at them for implementation guidance.
  1786. *
  1787. * Note: In the range, @from is inclusive and @to is exclusive, i.e. @from is
  1788. * the first byte in the page that will be written to and @to is the first byte
  1789. * after the last byte that will be written to.
  1790. */
  1791. static int ntfs_prepare_write(struct file *file, struct page *page,
  1792. unsigned from, unsigned to)
  1793. {
  1794. s64 new_size;
  1795. loff_t i_size;
  1796. struct inode *vi = page->mapping->host;
  1797. ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
  1798. ntfs_volume *vol = ni->vol;
  1799. ntfs_attr_search_ctx *ctx = NULL;
  1800. MFT_RECORD *m = NULL;
  1801. ATTR_RECORD *a;
  1802. u8 *kaddr;
  1803. u32 attr_len;
  1804. int err;
  1805. ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
  1806. "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
  1807. page->index, from, to);
  1808. BUG_ON(!PageLocked(page));
  1809. BUG_ON(from > PAGE_CACHE_SIZE);
  1810. BUG_ON(to > PAGE_CACHE_SIZE);
  1811. BUG_ON(from > to);
  1812. BUG_ON(NInoMstProtected(ni));
  1813. /*
  1814. * If a previous ntfs_truncate() failed, repeat it and abort if it
  1815. * fails again.
  1816. */
  1817. if (unlikely(NInoTruncateFailed(ni))) {
  1818. down_write(&vi->i_alloc_sem);
  1819. err = ntfs_truncate(vi);
  1820. up_write(&vi->i_alloc_sem);
  1821. if (err || NInoTruncateFailed(ni)) {
  1822. if (!err)
  1823. err = -EIO;
  1824. goto err_out;
  1825. }
  1826. }
  1827. /* If the attribute is not resident, deal with it elsewhere. */
  1828. if (NInoNonResident(ni)) {
  1829. /*
  1830. * Only unnamed $DATA attributes can be compressed, encrypted,
  1831. * and/or sparse.
  1832. */
  1833. if (ni->type == AT_DATA && !ni->name_len) {
  1834. /* If file is encrypted, deny access, just like NT4. */
  1835. if (NInoEncrypted(ni)) {
  1836. ntfs_debug("Denying write access to encrypted "
  1837. "file.");
  1838. return -EACCES;
  1839. }
  1840. /* Compressed data streams are handled in compress.c. */
  1841. if (NInoCompressed(ni)) {
  1842. // TODO: Implement and replace this check with
  1843. // return ntfs_write_compressed_block(page);
  1844. ntfs_error(vi->i_sb, "Writing to compressed "
  1845. "files is not supported yet. "
  1846. "Sorry.");
  1847. return -EOPNOTSUPP;
  1848. }
  1849. // TODO: Implement and remove this check.
  1850. if (NInoSparse(ni)) {
  1851. ntfs_error(vi->i_sb, "Writing to sparse files "
  1852. "is not supported yet. Sorry.");
  1853. return -EOPNOTSUPP;
  1854. }
  1855. }
  1856. /* Normal data stream. */
  1857. return ntfs_prepare_nonresident_write(page, from, to);
  1858. }
  1859. /*
  1860. * Attribute is resident, implying it is not compressed, encrypted, or
  1861. * sparse.
  1862. */
  1863. BUG_ON(page_has_buffers(page));
  1864. new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
  1865. /* If we do not need to resize the attribute allocation we are done. */
  1866. if (new_size <= i_size_read(vi))
  1867. goto done;
  1868. /* Map, pin, and lock the (base) mft record. */
  1869. if (!NInoAttr(ni))
  1870. base_ni = ni;
  1871. else
  1872. base_ni = ni->ext.base_ntfs_ino;
  1873. m = map_mft_record(base_ni);
  1874. if (IS_ERR(m)) {
  1875. err = PTR_ERR(m);
  1876. m = NULL;
  1877. ctx = NULL;
  1878. goto err_out;
  1879. }
  1880. ctx = ntfs_attr_get_search_ctx(base_ni, m);
  1881. if (unlikely(!ctx)) {
  1882. err = -ENOMEM;
  1883. goto err_out;
  1884. }
  1885. err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
  1886. CASE_SENSITIVE, 0, NULL, 0, ctx);
  1887. if (unlikely(err)) {
  1888. if (err == -ENOENT)
  1889. err = -EIO;
  1890. goto err_out;
  1891. }
  1892. m = ctx->mrec;
  1893. a = ctx->attr;
  1894. /* The total length of the attribute value. */
  1895. attr_len = le32_to_cpu(a->data.resident.value_length);
  1896. /* Fix an eventual previous failure of ntfs_commit_write(). */
  1897. i_size = i_size_read(vi);
  1898. if (unlikely(attr_len > i_size)) {
  1899. attr_len = i_size;
  1900. a->data.resident.value_length = cpu_to_le32(attr_len);
  1901. }
  1902. /* If we do not need to resize the attribute allocation we are done. */
  1903. if (new_size <= attr_len)
  1904. goto done_unm;
  1905. /* Check if new size is allowed in $AttrDef. */
  1906. err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
  1907. if (unlikely(err)) {
  1908. if (err == -ERANGE) {
  1909. ntfs_error(vol->sb, "Write would cause the inode "
  1910. "0x%lx to exceed the maximum size for "
  1911. "its attribute type (0x%x). Aborting "
  1912. "write.", vi->i_ino,
  1913. le32_to_cpu(ni->type));
  1914. } else {
  1915. ntfs_error(vol->sb, "Inode 0x%lx has unknown "
  1916. "attribute type 0x%x. Aborting "
  1917. "write.", vi->i_ino,
  1918. le32_to_cpu(ni->type));
  1919. err = -EIO;
  1920. }
  1921. goto err_out2;
  1922. }
  1923. /*
  1924. * Extend the attribute record to be able to store the new attribute
  1925. * size.
  1926. */
  1927. if (new_size >= vol->mft_record_size || ntfs_attr_record_resize(m, a,
  1928. le16_to_cpu(a->data.resident.value_offset) +
  1929. new_size)) {
  1930. /* Not enough space in the mft record. */
  1931. ntfs_error(vol->sb, "Not enough space in the mft record for "
  1932. "the resized attribute value. This is not "
  1933. "supported yet. Aborting write.");
  1934. err = -EOPNOTSUPP;
  1935. goto err_out2;
  1936. }
  1937. /*
  1938. * We have enough space in the mft record to fit the write. This
  1939. * implies the attribute is smaller than the mft record and hence the
  1940. * attribute must be in a single page and hence page->index must be 0.
  1941. */
  1942. BUG_ON(page->index);
  1943. /*
  1944. * If the beginning of the write is past the old size, enlarge the
  1945. * attribute value up to the beginning of the write and fill it with
  1946. * zeroes.
  1947. */
  1948. if (from > attr_len) {
  1949. memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) +
  1950. attr_len, 0, from - attr_len);
  1951. a->data.resident.value_length = cpu_to_le32(from);
  1952. /* Zero the corresponding area in the page as well. */
  1953. if (PageUptodate(page)) {
  1954. kaddr = kmap_atomic(page, KM_USER0);
  1955. memset(kaddr + attr_len, 0, from - attr_len);
  1956. kunmap_atomic(kaddr, KM_USER0);
  1957. flush_dcache_page(page);
  1958. }
  1959. }
  1960. flush_dcache_mft_record_page(ctx->ntfs_ino);
  1961. mark_mft_record_dirty(ctx->ntfs_ino);
  1962. done_unm:
  1963. ntfs_attr_put_search_ctx(ctx);
  1964. unmap_mft_record(base_ni);
  1965. /*
  1966. * Because resident attributes are handled by memcpy() to/from the
  1967. * corresponding MFT record, and because this form of i/o is byte
  1968. * aligned rather than block aligned, there is no need to bring the
  1969. * page uptodate here as in the non-resident case where we need to
  1970. * bring the buffers straddled by the write uptodate before
  1971. * generic_file_write() does the copying from userspace.
  1972. *
  1973. * We thus defer the uptodate bringing of the page region outside the
  1974. * region written to to ntfs_commit_write(), which makes the code
  1975. * simpler and saves one atomic kmap which is good.
  1976. */
  1977. done:
  1978. ntfs_debug("Done.");
  1979. return 0;
  1980. err_out:
  1981. if (err == -ENOMEM)
  1982. ntfs_warning(vi->i_sb, "Error allocating memory required to "
  1983. "prepare the write.");
  1984. else {
  1985. ntfs_error(vi->i_sb, "Resident attribute prepare write failed "
  1986. "with error %i.", err);
  1987. NVolSetErrors(vol);
  1988. make_bad_inode(vi);
  1989. }
  1990. err_out2:
  1991. if (ctx)
  1992. ntfs_attr_put_search_ctx(ctx);
  1993. if (m)
  1994. unmap_mft_record(base_ni);
  1995. return err;
  1996. }
  1997. /**
  1998. * ntfs_commit_nonresident_write -
  1999. *
  2000. */
  2001. static int ntfs_commit_nonresident_write(struct page *page,
  2002. unsigned from, unsigned to)
  2003. {
  2004. s64 pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
  2005. struct inode *vi = page->mapping->host;
  2006. struct buffer_head *bh, *head;
  2007. unsigned int block_start, block_end, blocksize;
  2008. BOOL partial;
  2009. ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
  2010. "0x%lx, from = %u, to = %u.", vi->i_ino,
  2011. NTFS_I(vi)->type, page->index, from, to);
  2012. blocksize = 1 << vi->i_blkbits;
  2013. // FIXME: We need a whole slew of special cases in here for compressed
  2014. // files for example...
  2015. // For now, we know ntfs_prepare_write() would have failed so we can't
  2016. // get here in any of the cases which we have to special case, so we
  2017. // are just a ripped off, unrolled generic_commit_write().
  2018. bh = head = page_buffers(page);
  2019. block_start = 0;
  2020. partial = FALSE;
  2021. do {
  2022. block_end = block_start + blocksize;
  2023. if (block_end <= from || block_start >= to) {
  2024. if (!buffer_uptodate(bh))
  2025. partial = TRUE;
  2026. } else {
  2027. set_buffer_uptodate(bh);
  2028. mark_buffer_dirty(bh);
  2029. }
  2030. } while (block_start = block_end, (bh = bh->b_this_page) != head);
  2031. /*
  2032. * If this is a partial write which happened to make all buffers
  2033. * uptodate then we can optimize away a bogus ->readpage() for the next
  2034. * read(). Here we 'discover' whether the page went uptodate as a
  2035. * result of this (potentially partial) write.
  2036. */
  2037. if (!partial)
  2038. SetPageUptodate(page);
  2039. /*
  2040. * Not convinced about this at all. See disparity comment above. For
  2041. * now we know ntfs_prepare_write() would have failed in the write
  2042. * exceeds i_size case, so this will never trigger which is fine.
  2043. */
  2044. if (pos > i_size_read(vi)) {
  2045. ntfs_error(vi->i_sb, "Writing beyond the existing file size is "
  2046. "not supported yet. Sorry.");
  2047. return -EOPNOTSUPP;
  2048. // vi->i_size = pos;
  2049. // mark_inode_dirty(vi);
  2050. }
  2051. ntfs_debug("Done.");
  2052. return 0;
  2053. }
  2054. /**
  2055. * ntfs_commit_write - commit the received data
  2056. *
  2057. * This is called from generic_file_write() with i_sem held on the inode
  2058. * (@page->mapping->host). The @page is locked but not kmap()ped. The source
  2059. * data has already been copied into the @page. ntfs_prepare_write() has been
  2060. * called before the data copied and it returned success so we can take the
  2061. * results of various BUG checks and some error handling for granted.
  2062. *
  2063. * Need to mark modified blocks dirty so they get written out later when
  2064. * ntfs_writepage() is invoked by the VM.
  2065. *
  2066. * Return 0 on success or -errno on error.
  2067. *
  2068. * Should be using generic_commit_write(). This marks buffers uptodate and
  2069. * dirty, sets the page uptodate if all buffers in the page are uptodate, and
  2070. * updates i_size if the end of io is beyond i_size. In that case, it also
  2071. * marks the inode dirty.
  2072. *
  2073. * Cannot use generic_commit_write() due to ntfs specialities but can look at
  2074. * it for implementation guidance.
  2075. *
  2076. * If things have gone as outlined in ntfs_prepare_write(), then we do not
  2077. * need to do any page content modifications here at all, except in the write
  2078. * to resident attribute case, where we need to do the uptodate bringing here
  2079. * which we combine with the copying into the mft record which means we save
  2080. * one atomic kmap.
  2081. */
  2082. static int ntfs_commit_write(struct file *file, struct page *page,
  2083. unsigned from, unsigned to)
  2084. {
  2085. struct inode *vi = page->mapping->host;
  2086. ntfs_inode *base_ni, *ni = NTFS_I(vi);
  2087. char *kaddr, *kattr;
  2088. ntfs_attr_search_ctx *ctx;
  2089. MFT_RECORD *m;
  2090. ATTR_RECORD *a;
  2091. u32 attr_len;
  2092. int err;
  2093. ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
  2094. "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
  2095. page->index, from, to);
  2096. /* If the attribute is not resident, deal with it elsewhere. */
  2097. if (NInoNonResident(ni)) {
  2098. /* Only unnamed $DATA attributes can be compressed/encrypted. */
  2099. if (ni->type == AT_DATA && !ni->name_len) {
  2100. /* Encrypted files need separate handling. */
  2101. if (NInoEncrypted(ni)) {
  2102. // We never get here at present!
  2103. BUG();
  2104. }
  2105. /* Compressed data streams are handled in compress.c. */
  2106. if (NInoCompressed(ni)) {
  2107. // TODO: Implement this!
  2108. // return ntfs_write_compressed_block(page);
  2109. // We never get here at present!
  2110. BUG();
  2111. }
  2112. }
  2113. /* Normal data stream. */
  2114. return ntfs_commit_nonresident_write(page, from, to);
  2115. }
  2116. /*
  2117. * Attribute is resident, implying it is not compressed, encrypted, or
  2118. * sparse.
  2119. */
  2120. if (!NInoAttr(ni))
  2121. base_ni = ni;
  2122. else
  2123. base_ni = ni->ext.base_ntfs_ino;
  2124. /* Map, pin, and lock the mft record. */
  2125. m = map_mft_record(base_ni);
  2126. if (IS_ERR(m)) {
  2127. err = PTR_ERR(m);
  2128. m = NULL;
  2129. ctx = NULL;
  2130. goto err_out;
  2131. }
  2132. ctx = ntfs_attr_get_search_ctx(base_ni, m);
  2133. if (unlikely(!ctx)) {
  2134. err = -ENOMEM;
  2135. goto err_out;
  2136. }
  2137. err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
  2138. CASE_SENSITIVE, 0, NULL, 0, ctx);
  2139. if (unlikely(err)) {
  2140. if (err == -ENOENT)
  2141. err = -EIO;
  2142. goto err_out;
  2143. }
  2144. a = ctx->attr;
  2145. /* The total length of the attribute value. */
  2146. attr_len = le32_to_cpu(a->data.resident.value_length);
  2147. BUG_ON(from > attr_len);
  2148. kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
  2149. kaddr = kmap_atomic(page, KM_USER0);
  2150. /* Copy the received data from the page to the mft record. */
  2151. memcpy(kattr + from, kaddr + from, to - from);
  2152. /* Update the attribute length if necessary. */
  2153. if (to > attr_len) {
  2154. attr_len = to;
  2155. a->data.resident.value_length = cpu_to_le32(attr_len);
  2156. }
  2157. /*
  2158. * If the page is not uptodate, bring the out of bounds area(s)
  2159. * uptodate by copying data from the mft record to the page.
  2160. */
  2161. if (!PageUptodate(page)) {
  2162. if (from > 0)
  2163. memcpy(kaddr, kattr, from);
  2164. if (to < attr_len)
  2165. memcpy(kaddr + to, kattr + to, attr_len - to);
  2166. /* Zero the region outside the end of the attribute value. */
  2167. if (attr_len < PAGE_CACHE_SIZE)
  2168. memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
  2169. /*
  2170. * The probability of not having done any of the above is
  2171. * extremely small, so we just flush unconditionally.
  2172. */
  2173. flush_dcache_page(page);
  2174. SetPageUptodate(page);
  2175. }
  2176. kunmap_atomic(kaddr, KM_USER0);
  2177. /* Update i_size if necessary. */
  2178. if (i_size_read(vi) < attr_len) {
  2179. unsigned long flags;
  2180. write_lock_irqsave(&ni->size_lock, flags);
  2181. ni->allocated_size = ni->initialized_size = attr_len;
  2182. i_size_write(vi, attr_len);
  2183. write_unlock_irqrestore(&ni->size_lock, flags);
  2184. }
  2185. /* Mark the mft record dirty, so it gets written back. */
  2186. flush_dcache_mft_record_page(ctx->ntfs_ino);
  2187. mark_mft_record_dirty(ctx->ntfs_ino);
  2188. ntfs_attr_put_search_ctx(ctx);
  2189. unmap_mft_record(base_ni);
  2190. ntfs_debug("Done.");
  2191. return 0;
  2192. err_out:
  2193. if (err == -ENOMEM) {
  2194. ntfs_warning(vi->i_sb, "Error allocating memory required to "
  2195. "commit the write.");
  2196. if (PageUptodate(page)) {
  2197. ntfs_warning(vi->i_sb, "Page is uptodate, setting "
  2198. "dirty so the write will be retried "
  2199. "later on by the VM.");
  2200. /*
  2201. * Put the page on mapping->dirty_pages, but leave its
  2202. * buffers' dirty state as-is.
  2203. */
  2204. __set_page_dirty_nobuffers(page);
  2205. err = 0;
  2206. } else
  2207. ntfs_error(vi->i_sb, "Page is not uptodate. Written "
  2208. "data has been lost.");
  2209. } else {
  2210. ntfs_error(vi->i_sb, "Resident attribute commit write failed "
  2211. "with error %i.", err);
  2212. NVolSetErrors(ni->vol);
  2213. make_bad_inode(vi);
  2214. }
  2215. if (ctx)
  2216. ntfs_attr_put_search_ctx(ctx);
  2217. if (m)
  2218. unmap_mft_record(base_ni);
  2219. return err;
  2220. }
  2221. #endif /* NTFS_RW */
  2222. /**
  2223. * ntfs_aops - general address space operations for inodes and attributes
  2224. */
  2225. struct address_space_operations ntfs_aops = {
  2226. .readpage = ntfs_readpage, /* Fill page with data. */
  2227. .sync_page = block_sync_page, /* Currently, just unplugs the
  2228. disk request queue. */
  2229. #ifdef NTFS_RW
  2230. .writepage = ntfs_writepage, /* Write dirty page to disk. */
  2231. .prepare_write = ntfs_prepare_write, /* Prepare page and buffers
  2232. ready to receive data. */
  2233. .commit_write = ntfs_commit_write, /* Commit received data. */
  2234. #endif /* NTFS_RW */
  2235. };
  2236. /**
  2237. * ntfs_mst_aops - general address space operations for mst protecteed inodes
  2238. * and attributes
  2239. */
  2240. struct address_space_operations ntfs_mst_aops = {
  2241. .readpage = ntfs_readpage, /* Fill page with data. */
  2242. .sync_page = block_sync_page, /* Currently, just unplugs the
  2243. disk request queue. */
  2244. #ifdef NTFS_RW
  2245. .writepage = ntfs_writepage, /* Write dirty page to disk. */
  2246. .set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty
  2247. without touching the buffers
  2248. belonging to the page. */
  2249. #endif /* NTFS_RW */
  2250. };
  2251. #ifdef NTFS_RW
  2252. /**
  2253. * mark_ntfs_record_dirty - mark an ntfs record dirty
  2254. * @page: page containing the ntfs record to mark dirty
  2255. * @ofs: byte offset within @page at which the ntfs record begins
  2256. *
  2257. * Set the buffers and the page in which the ntfs record is located dirty.
  2258. *
  2259. * The latter also marks the vfs inode the ntfs record belongs to dirty
  2260. * (I_DIRTY_PAGES only).
  2261. *
  2262. * If the page does not have buffers, we create them and set them uptodate.
  2263. * The page may not be locked which is why we need to handle the buffers under
  2264. * the mapping->private_lock. Once the buffers are marked dirty we no longer
  2265. * need the lock since try_to_free_buffers() does not free dirty buffers.
  2266. */
  2267. void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
  2268. struct address_space *mapping = page->mapping;
  2269. ntfs_inode *ni = NTFS_I(mapping->host);
  2270. struct buffer_head *bh, *head, *buffers_to_free = NULL;
  2271. unsigned int end, bh_size, bh_ofs;
  2272. BUG_ON(!PageUptodate(page));
  2273. end = ofs + ni->itype.index.block_size;
  2274. bh_size = 1 << VFS_I(ni)->i_blkbits;
  2275. spin_lock(&mapping->private_lock);
  2276. if (unlikely(!page_has_buffers(page))) {
  2277. spin_unlock(&mapping->private_lock);
  2278. bh = head = alloc_page_buffers(page, bh_size, 1);
  2279. spin_lock(&mapping->private_lock);
  2280. if (likely(!page_has_buffers(page))) {
  2281. struct buffer_head *tail;
  2282. do {
  2283. set_buffer_uptodate(bh);
  2284. tail = bh;
  2285. bh = bh->b_this_page;
  2286. } while (bh);
  2287. tail->b_this_page = head;
  2288. attach_page_buffers(page, head);
  2289. } else
  2290. buffers_to_free = bh;
  2291. }
  2292. bh = head = page_buffers(page);
  2293. BUG_ON(!bh);
  2294. do {
  2295. bh_ofs = bh_offset(bh);
  2296. if (bh_ofs + bh_size <= ofs)
  2297. continue;
  2298. if (unlikely(bh_ofs >= end))
  2299. break;
  2300. set_buffer_dirty(bh);
  2301. } while ((bh = bh->b_this_page) != head);
  2302. spin_unlock(&mapping->private_lock);
  2303. __set_page_dirty_nobuffers(page);
  2304. if (unlikely(buffers_to_free)) {
  2305. do {
  2306. bh = buffers_to_free->b_this_page;
  2307. free_buffer_head(buffers_to_free);
  2308. buffers_to_free = bh;
  2309. } while (buffers_to_free);
  2310. }
  2311. }
  2312. #endif /* NTFS_RW */