aops.c 68 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324
  1. /**
  2. * aops.c - NTFS kernel address space operations and page cache handling.
  3. * Part of the Linux-NTFS project.
  4. *
  5. * Copyright (c) 2001-2004 Anton Altaparmakov
  6. * Copyright (c) 2002 Richard Russon
  7. *
  8. * This program/include file is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU General Public License as published
  10. * by the Free Software Foundation; either version 2 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * This program/include file is distributed in the hope that it will be
  14. * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
  15. * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with this program (in the main directory of the Linux-NTFS
  20. * distribution in the file COPYING); if not, write to the Free Software
  21. * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  22. */
  23. #include <linux/errno.h>
  24. #include <linux/mm.h>
  25. #include <linux/pagemap.h>
  26. #include <linux/swap.h>
  27. #include <linux/buffer_head.h>
  28. #include <linux/writeback.h>
  29. #include "aops.h"
  30. #include "attrib.h"
  31. #include "debug.h"
  32. #include "inode.h"
  33. #include "mft.h"
  34. #include "runlist.h"
  35. #include "types.h"
  36. #include "ntfs.h"
  37. /**
  38. * ntfs_end_buffer_async_read - async io completion for reading attributes
  39. * @bh: buffer head on which io is completed
  40. * @uptodate: whether @bh is now uptodate or not
  41. *
  42. * Asynchronous I/O completion handler for reading pages belonging to the
  43. * attribute address space of an inode. The inodes can either be files or
  44. * directories or they can be fake inodes describing some attribute.
  45. *
  46. * If NInoMstProtected(), perform the post read mst fixups when all IO on the
  47. * page has been completed and mark the page uptodate or set the error bit on
  48. * the page. To determine the size of the records that need fixing up, we
  49. * cheat a little bit by setting the index_block_size in ntfs_inode to the ntfs
  50. * record size, and index_block_size_bits, to the log(base 2) of the ntfs
  51. * record size.
  52. */
  53. static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
  54. {
  55. static DEFINE_SPINLOCK(page_uptodate_lock);
  56. unsigned long flags;
  57. struct buffer_head *tmp;
  58. struct page *page;
  59. ntfs_inode *ni;
  60. int page_uptodate = 1;
  61. page = bh->b_page;
  62. ni = NTFS_I(page->mapping->host);
  63. if (likely(uptodate)) {
  64. s64 file_ofs;
  65. set_buffer_uptodate(bh);
  66. file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
  67. bh_offset(bh);
  68. /* Check for the current buffer head overflowing. */
  69. if (file_ofs + bh->b_size > ni->initialized_size) {
  70. char *addr;
  71. int ofs = 0;
  72. if (file_ofs < ni->initialized_size)
  73. ofs = ni->initialized_size - file_ofs;
  74. addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
  75. memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs);
  76. flush_dcache_page(page);
  77. kunmap_atomic(addr, KM_BIO_SRC_IRQ);
  78. }
  79. } else {
  80. clear_buffer_uptodate(bh);
  81. ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.",
  82. (unsigned long long)bh->b_blocknr);
  83. SetPageError(page);
  84. }
  85. spin_lock_irqsave(&page_uptodate_lock, flags);
  86. clear_buffer_async_read(bh);
  87. unlock_buffer(bh);
  88. tmp = bh;
  89. do {
  90. if (!buffer_uptodate(tmp))
  91. page_uptodate = 0;
  92. if (buffer_async_read(tmp)) {
  93. if (likely(buffer_locked(tmp)))
  94. goto still_busy;
  95. /* Async buffers must be locked. */
  96. BUG();
  97. }
  98. tmp = tmp->b_this_page;
  99. } while (tmp != bh);
  100. spin_unlock_irqrestore(&page_uptodate_lock, flags);
  101. /*
  102. * If none of the buffers had errors then we can set the page uptodate,
  103. * but we first have to perform the post read mst fixups, if the
  104. * attribute is mst protected, i.e. if NInoMstProteced(ni) is true.
  105. * Note we ignore fixup errors as those are detected when
  106. * map_mft_record() is called which gives us per record granularity
  107. * rather than per page granularity.
  108. */
  109. if (!NInoMstProtected(ni)) {
  110. if (likely(page_uptodate && !PageError(page)))
  111. SetPageUptodate(page);
  112. } else {
  113. char *addr;
  114. unsigned int i, recs;
  115. u32 rec_size;
  116. rec_size = ni->itype.index.block_size;
  117. recs = PAGE_CACHE_SIZE / rec_size;
  118. /* Should have been verified before we got here... */
  119. BUG_ON(!recs);
  120. addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
  121. for (i = 0; i < recs; i++)
  122. post_read_mst_fixup((NTFS_RECORD*)(addr +
  123. i * rec_size), rec_size);
  124. flush_dcache_page(page);
  125. kunmap_atomic(addr, KM_BIO_SRC_IRQ);
  126. if (likely(!PageError(page) && page_uptodate))
  127. SetPageUptodate(page);
  128. }
  129. unlock_page(page);
  130. return;
  131. still_busy:
  132. spin_unlock_irqrestore(&page_uptodate_lock, flags);
  133. return;
  134. }
  135. /**
  136. * ntfs_read_block - fill a @page of an address space with data
  137. * @page: page cache page to fill with data
  138. *
  139. * Fill the page @page of the address space belonging to the @page->host inode.
  140. * We read each buffer asynchronously and when all buffers are read in, our io
  141. * completion handler ntfs_end_buffer_read_async(), if required, automatically
  142. * applies the mst fixups to the page before finally marking it uptodate and
  143. * unlocking it.
  144. *
  145. * We only enforce allocated_size limit because i_size is checked for in
  146. * generic_file_read().
  147. *
  148. * Return 0 on success and -errno on error.
  149. *
  150. * Contains an adapted version of fs/buffer.c::block_read_full_page().
  151. */
  152. static int ntfs_read_block(struct page *page)
  153. {
  154. VCN vcn;
  155. LCN lcn;
  156. ntfs_inode *ni;
  157. ntfs_volume *vol;
  158. runlist_element *rl;
  159. struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
  160. sector_t iblock, lblock, zblock;
  161. unsigned int blocksize, vcn_ofs;
  162. int i, nr;
  163. unsigned char blocksize_bits;
  164. ni = NTFS_I(page->mapping->host);
  165. vol = ni->vol;
  166. /* $MFT/$DATA must have its complete runlist in memory at all times. */
  167. BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni));
  168. blocksize_bits = VFS_I(ni)->i_blkbits;
  169. blocksize = 1 << blocksize_bits;
  170. if (!page_has_buffers(page))
  171. create_empty_buffers(page, blocksize, 0);
  172. bh = head = page_buffers(page);
  173. if (unlikely(!bh)) {
  174. unlock_page(page);
  175. return -ENOMEM;
  176. }
  177. iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
  178. lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
  179. zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits;
  180. /* Loop through all the buffers in the page. */
  181. rl = NULL;
  182. nr = i = 0;
  183. do {
  184. u8 *kaddr;
  185. if (unlikely(buffer_uptodate(bh)))
  186. continue;
  187. if (unlikely(buffer_mapped(bh))) {
  188. arr[nr++] = bh;
  189. continue;
  190. }
  191. bh->b_bdev = vol->sb->s_bdev;
  192. /* Is the block within the allowed limits? */
  193. if (iblock < lblock) {
  194. BOOL is_retry = FALSE;
  195. /* Convert iblock into corresponding vcn and offset. */
  196. vcn = (VCN)iblock << blocksize_bits >>
  197. vol->cluster_size_bits;
  198. vcn_ofs = ((VCN)iblock << blocksize_bits) &
  199. vol->cluster_size_mask;
  200. if (!rl) {
  201. lock_retry_remap:
  202. down_read(&ni->runlist.lock);
  203. rl = ni->runlist.rl;
  204. }
  205. if (likely(rl != NULL)) {
  206. /* Seek to element containing target vcn. */
  207. while (rl->length && rl[1].vcn <= vcn)
  208. rl++;
  209. lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
  210. } else
  211. lcn = LCN_RL_NOT_MAPPED;
  212. /* Successful remap. */
  213. if (lcn >= 0) {
  214. /* Setup buffer head to correct block. */
  215. bh->b_blocknr = ((lcn << vol->cluster_size_bits)
  216. + vcn_ofs) >> blocksize_bits;
  217. set_buffer_mapped(bh);
  218. /* Only read initialized data blocks. */
  219. if (iblock < zblock) {
  220. arr[nr++] = bh;
  221. continue;
  222. }
  223. /* Fully non-initialized data block, zero it. */
  224. goto handle_zblock;
  225. }
  226. /* It is a hole, need to zero it. */
  227. if (lcn == LCN_HOLE)
  228. goto handle_hole;
  229. /* If first try and runlist unmapped, map and retry. */
  230. if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
  231. int err;
  232. is_retry = TRUE;
  233. /*
  234. * Attempt to map runlist, dropping lock for
  235. * the duration.
  236. */
  237. up_read(&ni->runlist.lock);
  238. err = ntfs_map_runlist(ni, vcn);
  239. if (likely(!err))
  240. goto lock_retry_remap;
  241. rl = NULL;
  242. lcn = err;
  243. }
  244. /* Hard error, zero out region. */
  245. bh->b_blocknr = -1;
  246. SetPageError(page);
  247. ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
  248. "attribute type 0x%x, vcn 0x%llx, "
  249. "offset 0x%x because its location on "
  250. "disk could not be determined%s "
  251. "(error code %lli).", ni->mft_no,
  252. ni->type, (unsigned long long)vcn,
  253. vcn_ofs, is_retry ? " even after "
  254. "retrying" : "", (long long)lcn);
  255. }
  256. /*
  257. * Either iblock was outside lblock limits or
  258. * ntfs_rl_vcn_to_lcn() returned error. Just zero that portion
  259. * of the page and set the buffer uptodate.
  260. */
  261. handle_hole:
  262. bh->b_blocknr = -1UL;
  263. clear_buffer_mapped(bh);
  264. handle_zblock:
  265. kaddr = kmap_atomic(page, KM_USER0);
  266. memset(kaddr + i * blocksize, 0, blocksize);
  267. flush_dcache_page(page);
  268. kunmap_atomic(kaddr, KM_USER0);
  269. set_buffer_uptodate(bh);
  270. } while (i++, iblock++, (bh = bh->b_this_page) != head);
  271. /* Release the lock if we took it. */
  272. if (rl)
  273. up_read(&ni->runlist.lock);
  274. /* Check we have at least one buffer ready for i/o. */
  275. if (nr) {
  276. struct buffer_head *tbh;
  277. /* Lock the buffers. */
  278. for (i = 0; i < nr; i++) {
  279. tbh = arr[i];
  280. lock_buffer(tbh);
  281. tbh->b_end_io = ntfs_end_buffer_async_read;
  282. set_buffer_async_read(tbh);
  283. }
  284. /* Finally, start i/o on the buffers. */
  285. for (i = 0; i < nr; i++) {
  286. tbh = arr[i];
  287. if (likely(!buffer_uptodate(tbh)))
  288. submit_bh(READ, tbh);
  289. else
  290. ntfs_end_buffer_async_read(tbh, 1);
  291. }
  292. return 0;
  293. }
  294. /* No i/o was scheduled on any of the buffers. */
  295. if (likely(!PageError(page)))
  296. SetPageUptodate(page);
  297. else /* Signal synchronous i/o error. */
  298. nr = -EIO;
  299. unlock_page(page);
  300. return nr;
  301. }
  302. /**
  303. * ntfs_readpage - fill a @page of a @file with data from the device
  304. * @file: open file to which the page @page belongs or NULL
  305. * @page: page cache page to fill with data
  306. *
  307. * For non-resident attributes, ntfs_readpage() fills the @page of the open
  308. * file @file by calling the ntfs version of the generic block_read_full_page()
  309. * function, ntfs_read_block(), which in turn creates and reads in the buffers
  310. * associated with the page asynchronously.
  311. *
  312. * For resident attributes, OTOH, ntfs_readpage() fills @page by copying the
  313. * data from the mft record (which at this stage is most likely in memory) and
  314. * fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as
  315. * even if the mft record is not cached at this point in time, we need to wait
  316. * for it to be read in before we can do the copy.
  317. *
  318. * Return 0 on success and -errno on error.
  319. */
  320. static int ntfs_readpage(struct file *file, struct page *page)
  321. {
  322. loff_t i_size;
  323. ntfs_inode *ni, *base_ni;
  324. u8 *kaddr;
  325. ntfs_attr_search_ctx *ctx;
  326. MFT_RECORD *mrec;
  327. u32 attr_len;
  328. int err = 0;
  329. BUG_ON(!PageLocked(page));
  330. /*
  331. * This can potentially happen because we clear PageUptodate() during
  332. * ntfs_writepage() of MstProtected() attributes.
  333. */
  334. if (PageUptodate(page)) {
  335. unlock_page(page);
  336. return 0;
  337. }
  338. ni = NTFS_I(page->mapping->host);
  339. /* NInoNonResident() == NInoIndexAllocPresent() */
  340. if (NInoNonResident(ni)) {
  341. /*
  342. * Only unnamed $DATA attributes can be compressed or
  343. * encrypted.
  344. */
  345. if (ni->type == AT_DATA && !ni->name_len) {
  346. /* If file is encrypted, deny access, just like NT4. */
  347. if (NInoEncrypted(ni)) {
  348. err = -EACCES;
  349. goto err_out;
  350. }
  351. /* Compressed data streams are handled in compress.c. */
  352. if (NInoCompressed(ni))
  353. return ntfs_read_compressed_block(page);
  354. }
  355. /* Normal data stream. */
  356. return ntfs_read_block(page);
  357. }
  358. /*
  359. * Attribute is resident, implying it is not compressed or encrypted.
  360. * This also means the attribute is smaller than an mft record and
  361. * hence smaller than a page, so can simply zero out any pages with
  362. * index above 0. We can also do this if the file size is 0.
  363. */
  364. if (unlikely(page->index > 0 || !i_size_read(VFS_I(ni)))) {
  365. kaddr = kmap_atomic(page, KM_USER0);
  366. memset(kaddr, 0, PAGE_CACHE_SIZE);
  367. flush_dcache_page(page);
  368. kunmap_atomic(kaddr, KM_USER0);
  369. goto done;
  370. }
  371. if (!NInoAttr(ni))
  372. base_ni = ni;
  373. else
  374. base_ni = ni->ext.base_ntfs_ino;
  375. /* Map, pin, and lock the mft record. */
  376. mrec = map_mft_record(base_ni);
  377. if (IS_ERR(mrec)) {
  378. err = PTR_ERR(mrec);
  379. goto err_out;
  380. }
  381. ctx = ntfs_attr_get_search_ctx(base_ni, mrec);
  382. if (unlikely(!ctx)) {
  383. err = -ENOMEM;
  384. goto unm_err_out;
  385. }
  386. err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
  387. CASE_SENSITIVE, 0, NULL, 0, ctx);
  388. if (unlikely(err))
  389. goto put_unm_err_out;
  390. attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
  391. i_size = i_size_read(VFS_I(ni));
  392. if (unlikely(attr_len > i_size))
  393. attr_len = i_size;
  394. kaddr = kmap_atomic(page, KM_USER0);
  395. /* Copy the data to the page. */
  396. memcpy(kaddr, (u8*)ctx->attr +
  397. le16_to_cpu(ctx->attr->data.resident.value_offset),
  398. attr_len);
  399. /* Zero the remainder of the page. */
  400. memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
  401. flush_dcache_page(page);
  402. kunmap_atomic(kaddr, KM_USER0);
  403. put_unm_err_out:
  404. ntfs_attr_put_search_ctx(ctx);
  405. unm_err_out:
  406. unmap_mft_record(base_ni);
  407. done:
  408. SetPageUptodate(page);
  409. err_out:
  410. unlock_page(page);
  411. return err;
  412. }
  413. #ifdef NTFS_RW
  414. /**
  415. * ntfs_write_block - write a @page to the backing store
  416. * @page: page cache page to write out
  417. * @wbc: writeback control structure
  418. *
  419. * This function is for writing pages belonging to non-resident, non-mst
  420. * protected attributes to their backing store.
  421. *
  422. * For a page with buffers, map and write the dirty buffers asynchronously
  423. * under page writeback. For a page without buffers, create buffers for the
  424. * page, then proceed as above.
  425. *
  426. * If a page doesn't have buffers the page dirty state is definitive. If a page
  427. * does have buffers, the page dirty state is just a hint, and the buffer dirty
  428. * state is definitive. (A hint which has rules: dirty buffers against a clean
  429. * page is illegal. Other combinations are legal and need to be handled. In
  430. * particular a dirty page containing clean buffers for example.)
  431. *
  432. * Return 0 on success and -errno on error.
  433. *
  434. * Based on ntfs_read_block() and __block_write_full_page().
  435. */
  436. static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
  437. {
  438. VCN vcn;
  439. LCN lcn;
  440. sector_t block, dblock, iblock;
  441. struct inode *vi;
  442. ntfs_inode *ni;
  443. ntfs_volume *vol;
  444. runlist_element *rl;
  445. struct buffer_head *bh, *head;
  446. unsigned int blocksize, vcn_ofs;
  447. int err;
  448. BOOL need_end_writeback;
  449. unsigned char blocksize_bits;
  450. vi = page->mapping->host;
  451. ni = NTFS_I(vi);
  452. vol = ni->vol;
  453. ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
  454. "0x%lx.", ni->mft_no, ni->type, page->index);
  455. BUG_ON(!NInoNonResident(ni));
  456. BUG_ON(NInoMstProtected(ni));
  457. blocksize_bits = vi->i_blkbits;
  458. blocksize = 1 << blocksize_bits;
  459. if (!page_has_buffers(page)) {
  460. BUG_ON(!PageUptodate(page));
  461. create_empty_buffers(page, blocksize,
  462. (1 << BH_Uptodate) | (1 << BH_Dirty));
  463. }
  464. bh = head = page_buffers(page);
  465. if (unlikely(!bh)) {
  466. ntfs_warning(vol->sb, "Error allocating page buffers. "
  467. "Redirtying page so we try again later.");
  468. /*
  469. * Put the page back on mapping->dirty_pages, but leave its
  470. * buffer's dirty state as-is.
  471. */
  472. redirty_page_for_writepage(wbc, page);
  473. unlock_page(page);
  474. return 0;
  475. }
  476. /* NOTE: Different naming scheme to ntfs_read_block()! */
  477. /* The first block in the page. */
  478. block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
  479. /* The first out of bounds block for the data size. */
  480. dblock = (vi->i_size + blocksize - 1) >> blocksize_bits;
  481. /* The last (fully or partially) initialized block. */
  482. iblock = ni->initialized_size >> blocksize_bits;
  483. /*
  484. * Be very careful. We have no exclusion from __set_page_dirty_buffers
  485. * here, and the (potentially unmapped) buffers may become dirty at
  486. * any time. If a buffer becomes dirty here after we've inspected it
  487. * then we just miss that fact, and the page stays dirty.
  488. *
  489. * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
  490. * handle that here by just cleaning them.
  491. */
  492. /*
  493. * Loop through all the buffers in the page, mapping all the dirty
  494. * buffers to disk addresses and handling any aliases from the
  495. * underlying block device's mapping.
  496. */
  497. rl = NULL;
  498. err = 0;
  499. do {
  500. BOOL is_retry = FALSE;
  501. if (unlikely(block >= dblock)) {
  502. /*
  503. * Mapped buffers outside i_size will occur, because
  504. * this page can be outside i_size when there is a
  505. * truncate in progress. The contents of such buffers
  506. * were zeroed by ntfs_writepage().
  507. *
  508. * FIXME: What about the small race window where
  509. * ntfs_writepage() has not done any clearing because
  510. * the page was within i_size but before we get here,
  511. * vmtruncate() modifies i_size?
  512. */
  513. clear_buffer_dirty(bh);
  514. set_buffer_uptodate(bh);
  515. continue;
  516. }
  517. /* Clean buffers are not written out, so no need to map them. */
  518. if (!buffer_dirty(bh))
  519. continue;
  520. /* Make sure we have enough initialized size. */
  521. if (unlikely((block >= iblock) &&
  522. (ni->initialized_size < vi->i_size))) {
  523. /*
  524. * If this page is fully outside initialized size, zero
  525. * out all pages between the current initialized size
  526. * and the current page. Just use ntfs_readpage() to do
  527. * the zeroing transparently.
  528. */
  529. if (block > iblock) {
  530. // TODO:
  531. // For each page do:
  532. // - read_cache_page()
  533. // Again for each page do:
  534. // - wait_on_page_locked()
  535. // - Check (PageUptodate(page) &&
  536. // !PageError(page))
  537. // Update initialized size in the attribute and
  538. // in the inode.
  539. // Again, for each page do:
  540. // __set_page_dirty_buffers();
  541. // page_cache_release()
  542. // We don't need to wait on the writes.
  543. // Update iblock.
  544. }
  545. /*
  546. * The current page straddles initialized size. Zero
  547. * all non-uptodate buffers and set them uptodate (and
  548. * dirty?). Note, there aren't any non-uptodate buffers
  549. * if the page is uptodate.
  550. * FIXME: For an uptodate page, the buffers may need to
  551. * be written out because they were not initialized on
  552. * disk before.
  553. */
  554. if (!PageUptodate(page)) {
  555. // TODO:
  556. // Zero any non-uptodate buffers up to i_size.
  557. // Set them uptodate and dirty.
  558. }
  559. // TODO:
  560. // Update initialized size in the attribute and in the
  561. // inode (up to i_size).
  562. // Update iblock.
  563. // FIXME: This is inefficient. Try to batch the two
  564. // size changes to happen in one go.
  565. ntfs_error(vol->sb, "Writing beyond initialized size "
  566. "is not supported yet. Sorry.");
  567. err = -EOPNOTSUPP;
  568. break;
  569. // Do NOT set_buffer_new() BUT DO clear buffer range
  570. // outside write request range.
  571. // set_buffer_uptodate() on complete buffers as well as
  572. // set_buffer_dirty().
  573. }
  574. /* No need to map buffers that are already mapped. */
  575. if (buffer_mapped(bh))
  576. continue;
  577. /* Unmapped, dirty buffer. Need to map it. */
  578. bh->b_bdev = vol->sb->s_bdev;
  579. /* Convert block into corresponding vcn and offset. */
  580. vcn = (VCN)block << blocksize_bits;
  581. vcn_ofs = vcn & vol->cluster_size_mask;
  582. vcn >>= vol->cluster_size_bits;
  583. if (!rl) {
  584. lock_retry_remap:
  585. down_read(&ni->runlist.lock);
  586. rl = ni->runlist.rl;
  587. }
  588. if (likely(rl != NULL)) {
  589. /* Seek to element containing target vcn. */
  590. while (rl->length && rl[1].vcn <= vcn)
  591. rl++;
  592. lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
  593. } else
  594. lcn = LCN_RL_NOT_MAPPED;
  595. /* Successful remap. */
  596. if (lcn >= 0) {
  597. /* Setup buffer head to point to correct block. */
  598. bh->b_blocknr = ((lcn << vol->cluster_size_bits) +
  599. vcn_ofs) >> blocksize_bits;
  600. set_buffer_mapped(bh);
  601. continue;
  602. }
  603. /* It is a hole, need to instantiate it. */
  604. if (lcn == LCN_HOLE) {
  605. // TODO: Instantiate the hole.
  606. // clear_buffer_new(bh);
  607. // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
  608. ntfs_error(vol->sb, "Writing into sparse regions is "
  609. "not supported yet. Sorry.");
  610. err = -EOPNOTSUPP;
  611. break;
  612. }
  613. /* If first try and runlist unmapped, map and retry. */
  614. if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
  615. is_retry = TRUE;
  616. /*
  617. * Attempt to map runlist, dropping lock for
  618. * the duration.
  619. */
  620. up_read(&ni->runlist.lock);
  621. err = ntfs_map_runlist(ni, vcn);
  622. if (likely(!err))
  623. goto lock_retry_remap;
  624. rl = NULL;
  625. lcn = err;
  626. }
  627. /* Failed to map the buffer, even after retrying. */
  628. bh->b_blocknr = -1;
  629. ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
  630. "attribute type 0x%x, vcn 0x%llx, offset 0x%x "
  631. "because its location on disk could not be "
  632. "determined%s (error code %lli).", ni->mft_no,
  633. ni->type, (unsigned long long)vcn,
  634. vcn_ofs, is_retry ? " even after "
  635. "retrying" : "", (long long)lcn);
  636. if (!err)
  637. err = -EIO;
  638. break;
  639. } while (block++, (bh = bh->b_this_page) != head);
  640. /* Release the lock if we took it. */
  641. if (rl)
  642. up_read(&ni->runlist.lock);
  643. /* For the error case, need to reset bh to the beginning. */
  644. bh = head;
  645. /* Just an optimization, so ->readpage() isn't called later. */
  646. if (unlikely(!PageUptodate(page))) {
  647. int uptodate = 1;
  648. do {
  649. if (!buffer_uptodate(bh)) {
  650. uptodate = 0;
  651. bh = head;
  652. break;
  653. }
  654. } while ((bh = bh->b_this_page) != head);
  655. if (uptodate)
  656. SetPageUptodate(page);
  657. }
  658. /* Setup all mapped, dirty buffers for async write i/o. */
  659. do {
  660. get_bh(bh);
  661. if (buffer_mapped(bh) && buffer_dirty(bh)) {
  662. lock_buffer(bh);
  663. if (test_clear_buffer_dirty(bh)) {
  664. BUG_ON(!buffer_uptodate(bh));
  665. mark_buffer_async_write(bh);
  666. } else
  667. unlock_buffer(bh);
  668. } else if (unlikely(err)) {
  669. /*
  670. * For the error case. The buffer may have been set
  671. * dirty during attachment to a dirty page.
  672. */
  673. if (err != -ENOMEM)
  674. clear_buffer_dirty(bh);
  675. }
  676. } while ((bh = bh->b_this_page) != head);
  677. if (unlikely(err)) {
  678. // TODO: Remove the -EOPNOTSUPP check later on...
  679. if (unlikely(err == -EOPNOTSUPP))
  680. err = 0;
  681. else if (err == -ENOMEM) {
  682. ntfs_warning(vol->sb, "Error allocating memory. "
  683. "Redirtying page so we try again "
  684. "later.");
  685. /*
  686. * Put the page back on mapping->dirty_pages, but
  687. * leave its buffer's dirty state as-is.
  688. */
  689. redirty_page_for_writepage(wbc, page);
  690. err = 0;
  691. } else
  692. SetPageError(page);
  693. }
  694. BUG_ON(PageWriteback(page));
  695. set_page_writeback(page); /* Keeps try_to_free_buffers() away. */
  696. unlock_page(page);
  697. /*
  698. * Submit the prepared buffers for i/o. Note the page is unlocked,
  699. * and the async write i/o completion handler can end_page_writeback()
  700. * at any time after the *first* submit_bh(). So the buffers can then
  701. * disappear...
  702. */
  703. need_end_writeback = TRUE;
  704. do {
  705. struct buffer_head *next = bh->b_this_page;
  706. if (buffer_async_write(bh)) {
  707. submit_bh(WRITE, bh);
  708. need_end_writeback = FALSE;
  709. }
  710. put_bh(bh);
  711. bh = next;
  712. } while (bh != head);
  713. /* If no i/o was started, need to end_page_writeback(). */
  714. if (unlikely(need_end_writeback))
  715. end_page_writeback(page);
  716. ntfs_debug("Done.");
  717. return err;
  718. }
  719. /**
  720. * ntfs_write_mst_block - write a @page to the backing store
  721. * @page: page cache page to write out
  722. * @wbc: writeback control structure
  723. *
  724. * This function is for writing pages belonging to non-resident, mst protected
  725. * attributes to their backing store. The only supported attributes are index
  726. * allocation and $MFT/$DATA. Both directory inodes and index inodes are
  727. * supported for the index allocation case.
  728. *
  729. * The page must remain locked for the duration of the write because we apply
  730. * the mst fixups, write, and then undo the fixups, so if we were to unlock the
  731. * page before undoing the fixups, any other user of the page will see the
  732. * page contents as corrupt.
  733. *
  734. * We clear the page uptodate flag for the duration of the function to ensure
  735. * exclusion for the $MFT/$DATA case against someone mapping an mft record we
  736. * are about to apply the mst fixups to.
  737. *
  738. * Return 0 on success and -errno on error.
  739. *
  740. * Based on ntfs_write_block(), ntfs_mft_writepage(), and
  741. * write_mft_record_nolock().
  742. */
  743. static int ntfs_write_mst_block(struct page *page,
  744. struct writeback_control *wbc)
  745. {
  746. sector_t block, dblock, rec_block;
  747. struct inode *vi = page->mapping->host;
  748. ntfs_inode *ni = NTFS_I(vi);
  749. ntfs_volume *vol = ni->vol;
  750. u8 *kaddr;
  751. unsigned char bh_size_bits = vi->i_blkbits;
  752. unsigned int bh_size = 1 << bh_size_bits;
  753. unsigned int rec_size = ni->itype.index.block_size;
  754. ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size];
  755. struct buffer_head *bh, *head, *tbh, *rec_start_bh;
  756. int max_bhs = PAGE_CACHE_SIZE / bh_size;
  757. struct buffer_head *bhs[max_bhs];
  758. runlist_element *rl;
  759. int i, nr_locked_nis, nr_recs, nr_bhs, bhs_per_rec, err, err2;
  760. unsigned rec_size_bits;
  761. BOOL sync, is_mft, page_is_dirty, rec_is_dirty;
  762. ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
  763. "0x%lx.", vi->i_ino, ni->type, page->index);
  764. BUG_ON(!NInoNonResident(ni));
  765. BUG_ON(!NInoMstProtected(ni));
  766. is_mft = (S_ISREG(vi->i_mode) && !vi->i_ino);
  767. /*
  768. * NOTE: ntfs_write_mst_block() would be called for $MFTMirr if a page
  769. * in its page cache were to be marked dirty. However this should
  770. * never happen with the current driver and considering we do not
  771. * handle this case here we do want to BUG(), at least for now.
  772. */
  773. BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) ||
  774. (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
  775. BUG_ON(!max_bhs);
  776. /* Were we called for sync purposes? */
  777. sync = (wbc->sync_mode == WB_SYNC_ALL);
  778. /* Make sure we have mapped buffers. */
  779. BUG_ON(!page_has_buffers(page));
  780. bh = head = page_buffers(page);
  781. BUG_ON(!bh);
  782. rec_size_bits = ni->itype.index.block_size_bits;
  783. BUG_ON(!(PAGE_CACHE_SIZE >> rec_size_bits));
  784. bhs_per_rec = rec_size >> bh_size_bits;
  785. BUG_ON(!bhs_per_rec);
  786. /* The first block in the page. */
  787. rec_block = block = (sector_t)page->index <<
  788. (PAGE_CACHE_SHIFT - bh_size_bits);
  789. /* The first out of bounds block for the data size. */
  790. dblock = (vi->i_size + bh_size - 1) >> bh_size_bits;
  791. rl = NULL;
  792. err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0;
  793. page_is_dirty = rec_is_dirty = FALSE;
  794. rec_start_bh = NULL;
  795. do {
  796. BOOL is_retry = FALSE;
  797. if (likely(block < rec_block)) {
  798. if (unlikely(block >= dblock)) {
  799. clear_buffer_dirty(bh);
  800. continue;
  801. }
  802. /*
  803. * This block is not the first one in the record. We
  804. * ignore the buffer's dirty state because we could
  805. * have raced with a parallel mark_ntfs_record_dirty().
  806. */
  807. if (!rec_is_dirty)
  808. continue;
  809. if (unlikely(err2)) {
  810. if (err2 != -ENOMEM)
  811. clear_buffer_dirty(bh);
  812. continue;
  813. }
  814. } else /* if (block == rec_block) */ {
  815. BUG_ON(block > rec_block);
  816. /* This block is the first one in the record. */
  817. rec_block += bhs_per_rec;
  818. err2 = 0;
  819. if (unlikely(block >= dblock)) {
  820. clear_buffer_dirty(bh);
  821. continue;
  822. }
  823. if (!buffer_dirty(bh)) {
  824. /* Clean records are not written out. */
  825. rec_is_dirty = FALSE;
  826. continue;
  827. }
  828. rec_is_dirty = TRUE;
  829. rec_start_bh = bh;
  830. }
  831. /* Need to map the buffer if it is not mapped already. */
  832. if (unlikely(!buffer_mapped(bh))) {
  833. VCN vcn;
  834. LCN lcn;
  835. unsigned int vcn_ofs;
  836. /* Obtain the vcn and offset of the current block. */
  837. vcn = (VCN)block << bh_size_bits;
  838. vcn_ofs = vcn & vol->cluster_size_mask;
  839. vcn >>= vol->cluster_size_bits;
  840. if (!rl) {
  841. lock_retry_remap:
  842. down_read(&ni->runlist.lock);
  843. rl = ni->runlist.rl;
  844. }
  845. if (likely(rl != NULL)) {
  846. /* Seek to element containing target vcn. */
  847. while (rl->length && rl[1].vcn <= vcn)
  848. rl++;
  849. lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
  850. } else
  851. lcn = LCN_RL_NOT_MAPPED;
  852. /* Successful remap. */
  853. if (likely(lcn >= 0)) {
  854. /* Setup buffer head to correct block. */
  855. bh->b_blocknr = ((lcn <<
  856. vol->cluster_size_bits) +
  857. vcn_ofs) >> bh_size_bits;
  858. set_buffer_mapped(bh);
  859. } else {
  860. /*
  861. * Remap failed. Retry to map the runlist once
  862. * unless we are working on $MFT which always
  863. * has the whole of its runlist in memory.
  864. */
  865. if (!is_mft && !is_retry &&
  866. lcn == LCN_RL_NOT_MAPPED) {
  867. is_retry = TRUE;
  868. /*
  869. * Attempt to map runlist, dropping
  870. * lock for the duration.
  871. */
  872. up_read(&ni->runlist.lock);
  873. err2 = ntfs_map_runlist(ni, vcn);
  874. if (likely(!err2))
  875. goto lock_retry_remap;
  876. if (err2 == -ENOMEM)
  877. page_is_dirty = TRUE;
  878. lcn = err2;
  879. } else
  880. err2 = -EIO;
  881. /* Hard error. Abort writing this record. */
  882. if (!err || err == -ENOMEM)
  883. err = err2;
  884. bh->b_blocknr = -1;
  885. ntfs_error(vol->sb, "Cannot write ntfs record "
  886. "0x%llx (inode 0x%lx, "
  887. "attribute type 0x%x) because "
  888. "its location on disk could "
  889. "not be determined (error "
  890. "code %lli).", (s64)block <<
  891. bh_size_bits >>
  892. vol->mft_record_size_bits,
  893. ni->mft_no, ni->type,
  894. (long long)lcn);
  895. /*
  896. * If this is not the first buffer, remove the
  897. * buffers in this record from the list of
  898. * buffers to write and clear their dirty bit
  899. * if not error -ENOMEM.
  900. */
  901. if (rec_start_bh != bh) {
  902. while (bhs[--nr_bhs] != rec_start_bh)
  903. ;
  904. if (err2 != -ENOMEM) {
  905. do {
  906. clear_buffer_dirty(
  907. rec_start_bh);
  908. } while ((rec_start_bh =
  909. rec_start_bh->
  910. b_this_page) !=
  911. bh);
  912. }
  913. }
  914. continue;
  915. }
  916. }
  917. BUG_ON(!buffer_uptodate(bh));
  918. BUG_ON(nr_bhs >= max_bhs);
  919. bhs[nr_bhs++] = bh;
  920. } while (block++, (bh = bh->b_this_page) != head);
  921. if (unlikely(rl))
  922. up_read(&ni->runlist.lock);
  923. /* If there were no dirty buffers, we are done. */
  924. if (!nr_bhs)
  925. goto done;
  926. /* Map the page so we can access its contents. */
  927. kaddr = kmap(page);
  928. /* Clear the page uptodate flag whilst the mst fixups are applied. */
  929. BUG_ON(!PageUptodate(page));
  930. ClearPageUptodate(page);
  931. for (i = 0; i < nr_bhs; i++) {
  932. unsigned int ofs;
  933. /* Skip buffers which are not at the beginning of records. */
  934. if (i % bhs_per_rec)
  935. continue;
  936. tbh = bhs[i];
  937. ofs = bh_offset(tbh);
  938. if (is_mft) {
  939. ntfs_inode *tni;
  940. unsigned long mft_no;
  941. /* Get the mft record number. */
  942. mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
  943. >> rec_size_bits;
  944. /* Check whether to write this mft record. */
  945. tni = NULL;
  946. if (!ntfs_may_write_mft_record(vol, mft_no,
  947. (MFT_RECORD*)(kaddr + ofs), &tni)) {
  948. /*
  949. * The record should not be written. This
  950. * means we need to redirty the page before
  951. * returning.
  952. */
  953. page_is_dirty = TRUE;
  954. /*
  955. * Remove the buffers in this mft record from
  956. * the list of buffers to write.
  957. */
  958. do {
  959. bhs[i] = NULL;
  960. } while (++i % bhs_per_rec);
  961. continue;
  962. }
  963. /*
  964. * The record should be written. If a locked ntfs
  965. * inode was returned, add it to the array of locked
  966. * ntfs inodes.
  967. */
  968. if (tni)
  969. locked_nis[nr_locked_nis++] = tni;
  970. }
  971. /* Apply the mst protection fixups. */
  972. err2 = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs),
  973. rec_size);
  974. if (unlikely(err2)) {
  975. if (!err || err == -ENOMEM)
  976. err = -EIO;
  977. ntfs_error(vol->sb, "Failed to apply mst fixups "
  978. "(inode 0x%lx, attribute type 0x%x, "
  979. "page index 0x%lx, page offset 0x%x)!"
  980. " Unmount and run chkdsk.", vi->i_ino,
  981. ni->type, page->index, ofs);
  982. /*
  983. * Mark all the buffers in this record clean as we do
  984. * not want to write corrupt data to disk.
  985. */
  986. do {
  987. clear_buffer_dirty(bhs[i]);
  988. bhs[i] = NULL;
  989. } while (++i % bhs_per_rec);
  990. continue;
  991. }
  992. nr_recs++;
  993. }
  994. /* If no records are to be written out, we are done. */
  995. if (!nr_recs)
  996. goto unm_done;
  997. flush_dcache_page(page);
  998. /* Lock buffers and start synchronous write i/o on them. */
  999. for (i = 0; i < nr_bhs; i++) {
  1000. tbh = bhs[i];
  1001. if (!tbh)
  1002. continue;
  1003. if (unlikely(test_set_buffer_locked(tbh)))
  1004. BUG();
  1005. /* The buffer dirty state is now irrelevant, just clean it. */
  1006. clear_buffer_dirty(tbh);
  1007. BUG_ON(!buffer_uptodate(tbh));
  1008. BUG_ON(!buffer_mapped(tbh));
  1009. get_bh(tbh);
  1010. tbh->b_end_io = end_buffer_write_sync;
  1011. submit_bh(WRITE, tbh);
  1012. }
  1013. /* Synchronize the mft mirror now if not @sync. */
  1014. if (is_mft && !sync)
  1015. goto do_mirror;
  1016. do_wait:
  1017. /* Wait on i/o completion of buffers. */
  1018. for (i = 0; i < nr_bhs; i++) {
  1019. tbh = bhs[i];
  1020. if (!tbh)
  1021. continue;
  1022. wait_on_buffer(tbh);
  1023. if (unlikely(!buffer_uptodate(tbh))) {
  1024. ntfs_error(vol->sb, "I/O error while writing ntfs "
  1025. "record buffer (inode 0x%lx, "
  1026. "attribute type 0x%x, page index "
  1027. "0x%lx, page offset 0x%lx)! Unmount "
  1028. "and run chkdsk.", vi->i_ino, ni->type,
  1029. page->index, bh_offset(tbh));
  1030. if (!err || err == -ENOMEM)
  1031. err = -EIO;
  1032. /*
  1033. * Set the buffer uptodate so the page and buffer
  1034. * states do not become out of sync.
  1035. */
  1036. set_buffer_uptodate(tbh);
  1037. }
  1038. }
  1039. /* If @sync, now synchronize the mft mirror. */
  1040. if (is_mft && sync) {
  1041. do_mirror:
  1042. for (i = 0; i < nr_bhs; i++) {
  1043. unsigned long mft_no;
  1044. unsigned int ofs;
  1045. /*
  1046. * Skip buffers which are not at the beginning of
  1047. * records.
  1048. */
  1049. if (i % bhs_per_rec)
  1050. continue;
  1051. tbh = bhs[i];
  1052. /* Skip removed buffers (and hence records). */
  1053. if (!tbh)
  1054. continue;
  1055. ofs = bh_offset(tbh);
  1056. /* Get the mft record number. */
  1057. mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
  1058. >> rec_size_bits;
  1059. if (mft_no < vol->mftmirr_size)
  1060. ntfs_sync_mft_mirror(vol, mft_no,
  1061. (MFT_RECORD*)(kaddr + ofs),
  1062. sync);
  1063. }
  1064. if (!sync)
  1065. goto do_wait;
  1066. }
  1067. /* Remove the mst protection fixups again. */
  1068. for (i = 0; i < nr_bhs; i++) {
  1069. if (!(i % bhs_per_rec)) {
  1070. tbh = bhs[i];
  1071. if (!tbh)
  1072. continue;
  1073. post_write_mst_fixup((NTFS_RECORD*)(kaddr +
  1074. bh_offset(tbh)));
  1075. }
  1076. }
  1077. flush_dcache_page(page);
  1078. unm_done:
  1079. /* Unlock any locked inodes. */
  1080. while (nr_locked_nis-- > 0) {
  1081. ntfs_inode *tni, *base_tni;
  1082. tni = locked_nis[nr_locked_nis];
  1083. /* Get the base inode. */
  1084. down(&tni->extent_lock);
  1085. if (tni->nr_extents >= 0)
  1086. base_tni = tni;
  1087. else {
  1088. base_tni = tni->ext.base_ntfs_ino;
  1089. BUG_ON(!base_tni);
  1090. }
  1091. up(&tni->extent_lock);
  1092. ntfs_debug("Unlocking %s inode 0x%lx.",
  1093. tni == base_tni ? "base" : "extent",
  1094. tni->mft_no);
  1095. up(&tni->mrec_lock);
  1096. atomic_dec(&tni->count);
  1097. iput(VFS_I(base_tni));
  1098. }
  1099. SetPageUptodate(page);
  1100. kunmap(page);
  1101. done:
  1102. if (unlikely(err && err != -ENOMEM)) {
  1103. /*
  1104. * Set page error if there is only one ntfs record in the page.
  1105. * Otherwise we would loose per-record granularity.
  1106. */
  1107. if (ni->itype.index.block_size == PAGE_CACHE_SIZE)
  1108. SetPageError(page);
  1109. NVolSetErrors(vol);
  1110. }
  1111. if (page_is_dirty) {
  1112. ntfs_debug("Page still contains one or more dirty ntfs "
  1113. "records. Redirtying the page starting at "
  1114. "record 0x%lx.", page->index <<
  1115. (PAGE_CACHE_SHIFT - rec_size_bits));
  1116. redirty_page_for_writepage(wbc, page);
  1117. unlock_page(page);
  1118. } else {
  1119. /*
  1120. * Keep the VM happy. This must be done otherwise the
  1121. * radix-tree tag PAGECACHE_TAG_DIRTY remains set even though
  1122. * the page is clean.
  1123. */
  1124. BUG_ON(PageWriteback(page));
  1125. set_page_writeback(page);
  1126. unlock_page(page);
  1127. end_page_writeback(page);
  1128. }
  1129. if (likely(!err))
  1130. ntfs_debug("Done.");
  1131. return err;
  1132. }
  1133. /**
  1134. * ntfs_writepage - write a @page to the backing store
  1135. * @page: page cache page to write out
  1136. * @wbc: writeback control structure
  1137. *
  1138. * This is called from the VM when it wants to have a dirty ntfs page cache
  1139. * page cleaned. The VM has already locked the page and marked it clean.
  1140. *
  1141. * For non-resident attributes, ntfs_writepage() writes the @page by calling
  1142. * the ntfs version of the generic block_write_full_page() function,
  1143. * ntfs_write_block(), which in turn if necessary creates and writes the
  1144. * buffers associated with the page asynchronously.
  1145. *
  1146. * For resident attributes, OTOH, ntfs_writepage() writes the @page by copying
  1147. * the data to the mft record (which at this stage is most likely in memory).
  1148. * The mft record is then marked dirty and written out asynchronously via the
  1149. * vfs inode dirty code path for the inode the mft record belongs to or via the
  1150. * vm page dirty code path for the page the mft record is in.
  1151. *
  1152. * Based on ntfs_readpage() and fs/buffer.c::block_write_full_page().
  1153. *
  1154. * Return 0 on success and -errno on error.
  1155. */
  1156. static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
  1157. {
  1158. loff_t i_size;
  1159. struct inode *vi;
  1160. ntfs_inode *ni, *base_ni;
  1161. char *kaddr;
  1162. ntfs_attr_search_ctx *ctx;
  1163. MFT_RECORD *m;
  1164. u32 attr_len;
  1165. int err;
  1166. BUG_ON(!PageLocked(page));
  1167. vi = page->mapping->host;
  1168. i_size = i_size_read(vi);
  1169. /* Is the page fully outside i_size? (truncate in progress) */
  1170. if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
  1171. PAGE_CACHE_SHIFT)) {
  1172. /*
  1173. * The page may have dirty, unmapped buffers. Make them
  1174. * freeable here, so the page does not leak.
  1175. */
  1176. block_invalidatepage(page, 0);
  1177. unlock_page(page);
  1178. ntfs_debug("Write outside i_size - truncated?");
  1179. return 0;
  1180. }
  1181. ni = NTFS_I(vi);
  1182. /* NInoNonResident() == NInoIndexAllocPresent() */
  1183. if (NInoNonResident(ni)) {
  1184. /*
  1185. * Only unnamed $DATA attributes can be compressed, encrypted,
  1186. * and/or sparse.
  1187. */
  1188. if (ni->type == AT_DATA && !ni->name_len) {
  1189. /* If file is encrypted, deny access, just like NT4. */
  1190. if (NInoEncrypted(ni)) {
  1191. unlock_page(page);
  1192. ntfs_debug("Denying write access to encrypted "
  1193. "file.");
  1194. return -EACCES;
  1195. }
  1196. /* Compressed data streams are handled in compress.c. */
  1197. if (NInoCompressed(ni)) {
  1198. // TODO: Implement and replace this check with
  1199. // return ntfs_write_compressed_block(page);
  1200. unlock_page(page);
  1201. ntfs_error(vi->i_sb, "Writing to compressed "
  1202. "files is not supported yet. "
  1203. "Sorry.");
  1204. return -EOPNOTSUPP;
  1205. }
  1206. // TODO: Implement and remove this check.
  1207. if (NInoSparse(ni)) {
  1208. unlock_page(page);
  1209. ntfs_error(vi->i_sb, "Writing to sparse files "
  1210. "is not supported yet. Sorry.");
  1211. return -EOPNOTSUPP;
  1212. }
  1213. }
  1214. /* We have to zero every time due to mmap-at-end-of-file. */
  1215. if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
  1216. /* The page straddles i_size. */
  1217. unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
  1218. kaddr = kmap_atomic(page, KM_USER0);
  1219. memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs);
  1220. flush_dcache_page(page);
  1221. kunmap_atomic(kaddr, KM_USER0);
  1222. }
  1223. /* Handle mst protected attributes. */
  1224. if (NInoMstProtected(ni))
  1225. return ntfs_write_mst_block(page, wbc);
  1226. /* Normal data stream. */
  1227. return ntfs_write_block(page, wbc);
  1228. }
  1229. /*
  1230. * Attribute is resident, implying it is not compressed, encrypted,
  1231. * sparse, or mst protected. This also means the attribute is smaller
  1232. * than an mft record and hence smaller than a page, so can simply
  1233. * return error on any pages with index above 0.
  1234. */
  1235. BUG_ON(page_has_buffers(page));
  1236. BUG_ON(!PageUptodate(page));
  1237. if (unlikely(page->index > 0)) {
  1238. ntfs_error(vi->i_sb, "BUG()! page->index (0x%lx) > 0. "
  1239. "Aborting write.", page->index);
  1240. BUG_ON(PageWriteback(page));
  1241. set_page_writeback(page);
  1242. unlock_page(page);
  1243. end_page_writeback(page);
  1244. return -EIO;
  1245. }
  1246. if (!NInoAttr(ni))
  1247. base_ni = ni;
  1248. else
  1249. base_ni = ni->ext.base_ntfs_ino;
  1250. /* Map, pin, and lock the mft record. */
  1251. m = map_mft_record(base_ni);
  1252. if (IS_ERR(m)) {
  1253. err = PTR_ERR(m);
  1254. m = NULL;
  1255. ctx = NULL;
  1256. goto err_out;
  1257. }
  1258. ctx = ntfs_attr_get_search_ctx(base_ni, m);
  1259. if (unlikely(!ctx)) {
  1260. err = -ENOMEM;
  1261. goto err_out;
  1262. }
  1263. err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
  1264. CASE_SENSITIVE, 0, NULL, 0, ctx);
  1265. if (unlikely(err))
  1266. goto err_out;
  1267. /*
  1268. * Keep the VM happy. This must be done otherwise the radix-tree tag
  1269. * PAGECACHE_TAG_DIRTY remains set even though the page is clean.
  1270. */
  1271. BUG_ON(PageWriteback(page));
  1272. set_page_writeback(page);
  1273. unlock_page(page);
  1274. /*
  1275. * Here, we don't need to zero the out of bounds area everytime because
  1276. * the below memcpy() already takes care of the mmap-at-end-of-file
  1277. * requirements. If the file is converted to a non-resident one, then
  1278. * the code path use is switched to the non-resident one where the
  1279. * zeroing happens on each ntfs_writepage() invocation.
  1280. *
  1281. * The above also applies nicely when i_size is decreased.
  1282. *
  1283. * When i_size is increased, the memory between the old and new i_size
  1284. * _must_ be zeroed (or overwritten with new data). Otherwise we will
  1285. * expose data to userspace/disk which should never have been exposed.
  1286. *
  1287. * FIXME: Ensure that i_size increases do the zeroing/overwriting and
  1288. * if we cannot guarantee that, then enable the zeroing below. If the
  1289. * zeroing below is enabled, we MUST move the unlock_page() from above
  1290. * to after the kunmap_atomic(), i.e. just before the
  1291. * end_page_writeback().
  1292. * UPDATE: ntfs_prepare/commit_write() do the zeroing on i_size
  1293. * increases for resident attributes so those are ok.
  1294. * TODO: ntfs_truncate(), others?
  1295. */
  1296. attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
  1297. i_size = i_size_read(VFS_I(ni));
  1298. kaddr = kmap_atomic(page, KM_USER0);
  1299. if (unlikely(attr_len > i_size)) {
  1300. /* Zero out of bounds area in the mft record. */
  1301. memset((u8*)ctx->attr + le16_to_cpu(
  1302. ctx->attr->data.resident.value_offset) +
  1303. i_size, 0, attr_len - i_size);
  1304. attr_len = i_size;
  1305. }
  1306. /* Copy the data from the page to the mft record. */
  1307. memcpy((u8*)ctx->attr +
  1308. le16_to_cpu(ctx->attr->data.resident.value_offset),
  1309. kaddr, attr_len);
  1310. flush_dcache_mft_record_page(ctx->ntfs_ino);
  1311. /* Zero out of bounds area in the page cache page. */
  1312. memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
  1313. flush_dcache_page(page);
  1314. kunmap_atomic(kaddr, KM_USER0);
  1315. end_page_writeback(page);
  1316. /* Mark the mft record dirty, so it gets written back. */
  1317. mark_mft_record_dirty(ctx->ntfs_ino);
  1318. ntfs_attr_put_search_ctx(ctx);
  1319. unmap_mft_record(base_ni);
  1320. return 0;
  1321. err_out:
  1322. if (err == -ENOMEM) {
  1323. ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying "
  1324. "page so we try again later.");
  1325. /*
  1326. * Put the page back on mapping->dirty_pages, but leave its
  1327. * buffers' dirty state as-is.
  1328. */
  1329. redirty_page_for_writepage(wbc, page);
  1330. err = 0;
  1331. } else {
  1332. ntfs_error(vi->i_sb, "Resident attribute write failed with "
  1333. "error %i. Setting page error flag.", err);
  1334. SetPageError(page);
  1335. }
  1336. unlock_page(page);
  1337. if (ctx)
  1338. ntfs_attr_put_search_ctx(ctx);
  1339. if (m)
  1340. unmap_mft_record(base_ni);
  1341. return err;
  1342. }
  1343. /**
  1344. * ntfs_prepare_nonresident_write -
  1345. *
  1346. */
  1347. static int ntfs_prepare_nonresident_write(struct page *page,
  1348. unsigned from, unsigned to)
  1349. {
  1350. VCN vcn;
  1351. LCN lcn;
  1352. sector_t block, ablock, iblock;
  1353. struct inode *vi;
  1354. ntfs_inode *ni;
  1355. ntfs_volume *vol;
  1356. runlist_element *rl;
  1357. struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
  1358. unsigned int vcn_ofs, block_start, block_end, blocksize;
  1359. int err;
  1360. BOOL is_retry;
  1361. unsigned char blocksize_bits;
  1362. vi = page->mapping->host;
  1363. ni = NTFS_I(vi);
  1364. vol = ni->vol;
  1365. ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
  1366. "0x%lx, from = %u, to = %u.", ni->mft_no, ni->type,
  1367. page->index, from, to);
  1368. BUG_ON(!NInoNonResident(ni));
  1369. blocksize_bits = vi->i_blkbits;
  1370. blocksize = 1 << blocksize_bits;
  1371. /*
  1372. * create_empty_buffers() will create uptodate/dirty buffers if the
  1373. * page is uptodate/dirty.
  1374. */
  1375. if (!page_has_buffers(page))
  1376. create_empty_buffers(page, blocksize, 0);
  1377. bh = head = page_buffers(page);
  1378. if (unlikely(!bh))
  1379. return -ENOMEM;
  1380. /* The first block in the page. */
  1381. block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
  1382. /*
  1383. * The first out of bounds block for the allocated size. No need to
  1384. * round up as allocated_size is in multiples of cluster size and the
  1385. * minimum cluster size is 512 bytes, which is equal to the smallest
  1386. * blocksize.
  1387. */
  1388. ablock = ni->allocated_size >> blocksize_bits;
  1389. /* The last (fully or partially) initialized block. */
  1390. iblock = ni->initialized_size >> blocksize_bits;
  1391. /* Loop through all the buffers in the page. */
  1392. block_start = 0;
  1393. rl = NULL;
  1394. err = 0;
  1395. do {
  1396. block_end = block_start + blocksize;
  1397. /*
  1398. * If buffer @bh is outside the write, just mark it uptodate
  1399. * if the page is uptodate and continue with the next buffer.
  1400. */
  1401. if (block_end <= from || block_start >= to) {
  1402. if (PageUptodate(page)) {
  1403. if (!buffer_uptodate(bh))
  1404. set_buffer_uptodate(bh);
  1405. }
  1406. continue;
  1407. }
  1408. /*
  1409. * @bh is at least partially being written to.
  1410. * Make sure it is not marked as new.
  1411. */
  1412. //if (buffer_new(bh))
  1413. // clear_buffer_new(bh);
  1414. if (block >= ablock) {
  1415. // TODO: block is above allocated_size, need to
  1416. // allocate it. Best done in one go to accommodate not
  1417. // only block but all above blocks up to and including:
  1418. // ((page->index << PAGE_CACHE_SHIFT) + to + blocksize
  1419. // - 1) >> blobksize_bits. Obviously will need to round
  1420. // up to next cluster boundary, too. This should be
  1421. // done with a helper function, so it can be reused.
  1422. ntfs_error(vol->sb, "Writing beyond allocated size "
  1423. "is not supported yet. Sorry.");
  1424. err = -EOPNOTSUPP;
  1425. goto err_out;
  1426. // Need to update ablock.
  1427. // Need to set_buffer_new() on all block bhs that are
  1428. // newly allocated.
  1429. }
  1430. /*
  1431. * Now we have enough allocated size to fulfill the whole
  1432. * request, i.e. block < ablock is true.
  1433. */
  1434. if (unlikely((block >= iblock) &&
  1435. (ni->initialized_size < vi->i_size))) {
  1436. /*
  1437. * If this page is fully outside initialized size, zero
  1438. * out all pages between the current initialized size
  1439. * and the current page. Just use ntfs_readpage() to do
  1440. * the zeroing transparently.
  1441. */
  1442. if (block > iblock) {
  1443. // TODO:
  1444. // For each page do:
  1445. // - read_cache_page()
  1446. // Again for each page do:
  1447. // - wait_on_page_locked()
  1448. // - Check (PageUptodate(page) &&
  1449. // !PageError(page))
  1450. // Update initialized size in the attribute and
  1451. // in the inode.
  1452. // Again, for each page do:
  1453. // __set_page_dirty_buffers();
  1454. // page_cache_release()
  1455. // We don't need to wait on the writes.
  1456. // Update iblock.
  1457. }
  1458. /*
  1459. * The current page straddles initialized size. Zero
  1460. * all non-uptodate buffers and set them uptodate (and
  1461. * dirty?). Note, there aren't any non-uptodate buffers
  1462. * if the page is uptodate.
  1463. * FIXME: For an uptodate page, the buffers may need to
  1464. * be written out because they were not initialized on
  1465. * disk before.
  1466. */
  1467. if (!PageUptodate(page)) {
  1468. // TODO:
  1469. // Zero any non-uptodate buffers up to i_size.
  1470. // Set them uptodate and dirty.
  1471. }
  1472. // TODO:
  1473. // Update initialized size in the attribute and in the
  1474. // inode (up to i_size).
  1475. // Update iblock.
  1476. // FIXME: This is inefficient. Try to batch the two
  1477. // size changes to happen in one go.
  1478. ntfs_error(vol->sb, "Writing beyond initialized size "
  1479. "is not supported yet. Sorry.");
  1480. err = -EOPNOTSUPP;
  1481. goto err_out;
  1482. // Do NOT set_buffer_new() BUT DO clear buffer range
  1483. // outside write request range.
  1484. // set_buffer_uptodate() on complete buffers as well as
  1485. // set_buffer_dirty().
  1486. }
  1487. /* Need to map unmapped buffers. */
  1488. if (!buffer_mapped(bh)) {
  1489. /* Unmapped buffer. Need to map it. */
  1490. bh->b_bdev = vol->sb->s_bdev;
  1491. /* Convert block into corresponding vcn and offset. */
  1492. vcn = (VCN)block << blocksize_bits >>
  1493. vol->cluster_size_bits;
  1494. vcn_ofs = ((VCN)block << blocksize_bits) &
  1495. vol->cluster_size_mask;
  1496. is_retry = FALSE;
  1497. if (!rl) {
  1498. lock_retry_remap:
  1499. down_read(&ni->runlist.lock);
  1500. rl = ni->runlist.rl;
  1501. }
  1502. if (likely(rl != NULL)) {
  1503. /* Seek to element containing target vcn. */
  1504. while (rl->length && rl[1].vcn <= vcn)
  1505. rl++;
  1506. lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
  1507. } else
  1508. lcn = LCN_RL_NOT_MAPPED;
  1509. if (unlikely(lcn < 0)) {
  1510. /*
  1511. * We extended the attribute allocation above.
  1512. * If we hit an ENOENT here it means that the
  1513. * allocation was insufficient which is a bug.
  1514. */
  1515. BUG_ON(lcn == LCN_ENOENT);
  1516. /* It is a hole, need to instantiate it. */
  1517. if (lcn == LCN_HOLE) {
  1518. // TODO: Instantiate the hole.
  1519. // clear_buffer_new(bh);
  1520. // unmap_underlying_metadata(bh->b_bdev,
  1521. // bh->b_blocknr);
  1522. // For non-uptodate buffers, need to
  1523. // zero out the region outside the
  1524. // request in this bh or all bhs,
  1525. // depending on what we implemented
  1526. // above.
  1527. // Need to flush_dcache_page().
  1528. // Or could use set_buffer_new()
  1529. // instead?
  1530. ntfs_error(vol->sb, "Writing into "
  1531. "sparse regions is "
  1532. "not supported yet. "
  1533. "Sorry.");
  1534. err = -EOPNOTSUPP;
  1535. goto err_out;
  1536. } else if (!is_retry &&
  1537. lcn == LCN_RL_NOT_MAPPED) {
  1538. is_retry = TRUE;
  1539. /*
  1540. * Attempt to map runlist, dropping
  1541. * lock for the duration.
  1542. */
  1543. up_read(&ni->runlist.lock);
  1544. err = ntfs_map_runlist(ni, vcn);
  1545. if (likely(!err))
  1546. goto lock_retry_remap;
  1547. rl = NULL;
  1548. lcn = err;
  1549. }
  1550. /*
  1551. * Failed to map the buffer, even after
  1552. * retrying.
  1553. */
  1554. bh->b_blocknr = -1;
  1555. ntfs_error(vol->sb, "Failed to write to inode "
  1556. "0x%lx, attribute type 0x%x, "
  1557. "vcn 0x%llx, offset 0x%x "
  1558. "because its location on disk "
  1559. "could not be determined%s "
  1560. "(error code %lli).",
  1561. ni->mft_no, ni->type,
  1562. (unsigned long long)vcn,
  1563. vcn_ofs, is_retry ? " even "
  1564. "after retrying" : "",
  1565. (long long)lcn);
  1566. if (!err)
  1567. err = -EIO;
  1568. goto err_out;
  1569. }
  1570. /* We now have a successful remap, i.e. lcn >= 0. */
  1571. /* Setup buffer head to correct block. */
  1572. bh->b_blocknr = ((lcn << vol->cluster_size_bits)
  1573. + vcn_ofs) >> blocksize_bits;
  1574. set_buffer_mapped(bh);
  1575. // FIXME: Something analogous to this is needed for
  1576. // each newly allocated block, i.e. BH_New.
  1577. // FIXME: Might need to take this out of the
  1578. // if (!buffer_mapped(bh)) {}, depending on how we
  1579. // implement things during the allocated_size and
  1580. // initialized_size extension code above.
  1581. if (buffer_new(bh)) {
  1582. clear_buffer_new(bh);
  1583. unmap_underlying_metadata(bh->b_bdev,
  1584. bh->b_blocknr);
  1585. if (PageUptodate(page)) {
  1586. set_buffer_uptodate(bh);
  1587. continue;
  1588. }
  1589. /*
  1590. * Page is _not_ uptodate, zero surrounding
  1591. * region. NOTE: This is how we decide if to
  1592. * zero or not!
  1593. */
  1594. if (block_end > to || block_start < from) {
  1595. void *kaddr;
  1596. kaddr = kmap_atomic(page, KM_USER0);
  1597. if (block_end > to)
  1598. memset(kaddr + to, 0,
  1599. block_end - to);
  1600. if (block_start < from)
  1601. memset(kaddr + block_start, 0,
  1602. from -
  1603. block_start);
  1604. flush_dcache_page(page);
  1605. kunmap_atomic(kaddr, KM_USER0);
  1606. }
  1607. continue;
  1608. }
  1609. }
  1610. /* @bh is mapped, set it uptodate if the page is uptodate. */
  1611. if (PageUptodate(page)) {
  1612. if (!buffer_uptodate(bh))
  1613. set_buffer_uptodate(bh);
  1614. continue;
  1615. }
  1616. /*
  1617. * The page is not uptodate. The buffer is mapped. If it is not
  1618. * uptodate, and it is only partially being written to, we need
  1619. * to read the buffer in before the write, i.e. right now.
  1620. */
  1621. if (!buffer_uptodate(bh) &&
  1622. (block_start < from || block_end > to)) {
  1623. ll_rw_block(READ, 1, &bh);
  1624. *wait_bh++ = bh;
  1625. }
  1626. } while (block++, block_start = block_end,
  1627. (bh = bh->b_this_page) != head);
  1628. /* Release the lock if we took it. */
  1629. if (rl) {
  1630. up_read(&ni->runlist.lock);
  1631. rl = NULL;
  1632. }
  1633. /* If we issued read requests, let them complete. */
  1634. while (wait_bh > wait) {
  1635. wait_on_buffer(*--wait_bh);
  1636. if (!buffer_uptodate(*wait_bh))
  1637. return -EIO;
  1638. }
  1639. ntfs_debug("Done.");
  1640. return 0;
  1641. err_out:
  1642. /*
  1643. * Zero out any newly allocated blocks to avoid exposing stale data.
  1644. * If BH_New is set, we know that the block was newly allocated in the
  1645. * above loop.
  1646. * FIXME: What about initialized_size increments? Have we done all the
  1647. * required zeroing above? If not this error handling is broken, and
  1648. * in particular the if (block_end <= from) check is completely bogus.
  1649. */
  1650. bh = head;
  1651. block_start = 0;
  1652. is_retry = FALSE;
  1653. do {
  1654. block_end = block_start + blocksize;
  1655. if (block_end <= from)
  1656. continue;
  1657. if (block_start >= to)
  1658. break;
  1659. if (buffer_new(bh)) {
  1660. void *kaddr;
  1661. clear_buffer_new(bh);
  1662. kaddr = kmap_atomic(page, KM_USER0);
  1663. memset(kaddr + block_start, 0, bh->b_size);
  1664. kunmap_atomic(kaddr, KM_USER0);
  1665. set_buffer_uptodate(bh);
  1666. mark_buffer_dirty(bh);
  1667. is_retry = TRUE;
  1668. }
  1669. } while (block_start = block_end, (bh = bh->b_this_page) != head);
  1670. if (is_retry)
  1671. flush_dcache_page(page);
  1672. if (rl)
  1673. up_read(&ni->runlist.lock);
  1674. return err;
  1675. }
  1676. /**
  1677. * ntfs_prepare_write - prepare a page for receiving data
  1678. *
  1679. * This is called from generic_file_write() with i_sem held on the inode
  1680. * (@page->mapping->host). The @page is locked but not kmap()ped. The source
  1681. * data has not yet been copied into the @page.
  1682. *
  1683. * Need to extend the attribute/fill in holes if necessary, create blocks and
  1684. * make partially overwritten blocks uptodate,
  1685. *
  1686. * i_size is not to be modified yet.
  1687. *
  1688. * Return 0 on success or -errno on error.
  1689. *
  1690. * Should be using block_prepare_write() [support for sparse files] or
  1691. * cont_prepare_write() [no support for sparse files]. Cannot do that due to
  1692. * ntfs specifics but can look at them for implementation guidance.
  1693. *
  1694. * Note: In the range, @from is inclusive and @to is exclusive, i.e. @from is
  1695. * the first byte in the page that will be written to and @to is the first byte
  1696. * after the last byte that will be written to.
  1697. */
  1698. static int ntfs_prepare_write(struct file *file, struct page *page,
  1699. unsigned from, unsigned to)
  1700. {
  1701. s64 new_size;
  1702. struct inode *vi = page->mapping->host;
  1703. ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
  1704. ntfs_volume *vol = ni->vol;
  1705. ntfs_attr_search_ctx *ctx = NULL;
  1706. MFT_RECORD *m = NULL;
  1707. ATTR_RECORD *a;
  1708. u8 *kaddr;
  1709. u32 attr_len;
  1710. int err;
  1711. ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
  1712. "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
  1713. page->index, from, to);
  1714. BUG_ON(!PageLocked(page));
  1715. BUG_ON(from > PAGE_CACHE_SIZE);
  1716. BUG_ON(to > PAGE_CACHE_SIZE);
  1717. BUG_ON(from > to);
  1718. BUG_ON(NInoMstProtected(ni));
  1719. /*
  1720. * If a previous ntfs_truncate() failed, repeat it and abort if it
  1721. * fails again.
  1722. */
  1723. if (unlikely(NInoTruncateFailed(ni))) {
  1724. down_write(&vi->i_alloc_sem);
  1725. err = ntfs_truncate(vi);
  1726. up_write(&vi->i_alloc_sem);
  1727. if (err || NInoTruncateFailed(ni)) {
  1728. if (!err)
  1729. err = -EIO;
  1730. goto err_out;
  1731. }
  1732. }
  1733. /* If the attribute is not resident, deal with it elsewhere. */
  1734. if (NInoNonResident(ni)) {
  1735. /*
  1736. * Only unnamed $DATA attributes can be compressed, encrypted,
  1737. * and/or sparse.
  1738. */
  1739. if (ni->type == AT_DATA && !ni->name_len) {
  1740. /* If file is encrypted, deny access, just like NT4. */
  1741. if (NInoEncrypted(ni)) {
  1742. ntfs_debug("Denying write access to encrypted "
  1743. "file.");
  1744. return -EACCES;
  1745. }
  1746. /* Compressed data streams are handled in compress.c. */
  1747. if (NInoCompressed(ni)) {
  1748. // TODO: Implement and replace this check with
  1749. // return ntfs_write_compressed_block(page);
  1750. ntfs_error(vi->i_sb, "Writing to compressed "
  1751. "files is not supported yet. "
  1752. "Sorry.");
  1753. return -EOPNOTSUPP;
  1754. }
  1755. // TODO: Implement and remove this check.
  1756. if (NInoSparse(ni)) {
  1757. ntfs_error(vi->i_sb, "Writing to sparse files "
  1758. "is not supported yet. Sorry.");
  1759. return -EOPNOTSUPP;
  1760. }
  1761. }
  1762. /* Normal data stream. */
  1763. return ntfs_prepare_nonresident_write(page, from, to);
  1764. }
  1765. /*
  1766. * Attribute is resident, implying it is not compressed, encrypted, or
  1767. * sparse.
  1768. */
  1769. BUG_ON(page_has_buffers(page));
  1770. new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
  1771. /* If we do not need to resize the attribute allocation we are done. */
  1772. if (new_size <= vi->i_size)
  1773. goto done;
  1774. // FIXME: We abort for now as this code is not safe.
  1775. ntfs_error(vi->i_sb, "Changing the file size is not supported yet. "
  1776. "Sorry.");
  1777. return -EOPNOTSUPP;
  1778. /* Map, pin, and lock the (base) mft record. */
  1779. if (!NInoAttr(ni))
  1780. base_ni = ni;
  1781. else
  1782. base_ni = ni->ext.base_ntfs_ino;
  1783. m = map_mft_record(base_ni);
  1784. if (IS_ERR(m)) {
  1785. err = PTR_ERR(m);
  1786. m = NULL;
  1787. ctx = NULL;
  1788. goto err_out;
  1789. }
  1790. ctx = ntfs_attr_get_search_ctx(base_ni, m);
  1791. if (unlikely(!ctx)) {
  1792. err = -ENOMEM;
  1793. goto err_out;
  1794. }
  1795. err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
  1796. CASE_SENSITIVE, 0, NULL, 0, ctx);
  1797. if (unlikely(err)) {
  1798. if (err == -ENOENT)
  1799. err = -EIO;
  1800. goto err_out;
  1801. }
  1802. m = ctx->mrec;
  1803. a = ctx->attr;
  1804. /* The total length of the attribute value. */
  1805. attr_len = le32_to_cpu(a->data.resident.value_length);
  1806. BUG_ON(vi->i_size != attr_len);
  1807. /* Check if new size is allowed in $AttrDef. */
  1808. err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
  1809. if (unlikely(err)) {
  1810. if (err == -ERANGE) {
  1811. ntfs_error(vol->sb, "Write would cause the inode "
  1812. "0x%lx to exceed the maximum size for "
  1813. "its attribute type (0x%x). Aborting "
  1814. "write.", vi->i_ino,
  1815. le32_to_cpu(ni->type));
  1816. } else {
  1817. ntfs_error(vol->sb, "Inode 0x%lx has unknown "
  1818. "attribute type 0x%x. Aborting "
  1819. "write.", vi->i_ino,
  1820. le32_to_cpu(ni->type));
  1821. err = -EIO;
  1822. }
  1823. goto err_out2;
  1824. }
  1825. /*
  1826. * Extend the attribute record to be able to store the new attribute
  1827. * size.
  1828. */
  1829. if (new_size >= vol->mft_record_size || ntfs_attr_record_resize(m, a,
  1830. le16_to_cpu(a->data.resident.value_offset) +
  1831. new_size)) {
  1832. /* Not enough space in the mft record. */
  1833. ntfs_error(vol->sb, "Not enough space in the mft record for "
  1834. "the resized attribute value. This is not "
  1835. "supported yet. Aborting write.");
  1836. err = -EOPNOTSUPP;
  1837. goto err_out2;
  1838. }
  1839. /*
  1840. * We have enough space in the mft record to fit the write. This
  1841. * implies the attribute is smaller than the mft record and hence the
  1842. * attribute must be in a single page and hence page->index must be 0.
  1843. */
  1844. BUG_ON(page->index);
  1845. /*
  1846. * If the beginning of the write is past the old size, enlarge the
  1847. * attribute value up to the beginning of the write and fill it with
  1848. * zeroes.
  1849. */
  1850. if (from > attr_len) {
  1851. memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) +
  1852. attr_len, 0, from - attr_len);
  1853. a->data.resident.value_length = cpu_to_le32(from);
  1854. /* Zero the corresponding area in the page as well. */
  1855. if (PageUptodate(page)) {
  1856. kaddr = kmap_atomic(page, KM_USER0);
  1857. memset(kaddr + attr_len, 0, from - attr_len);
  1858. kunmap_atomic(kaddr, KM_USER0);
  1859. flush_dcache_page(page);
  1860. }
  1861. }
  1862. flush_dcache_mft_record_page(ctx->ntfs_ino);
  1863. mark_mft_record_dirty(ctx->ntfs_ino);
  1864. ntfs_attr_put_search_ctx(ctx);
  1865. unmap_mft_record(base_ni);
  1866. /*
  1867. * Because resident attributes are handled by memcpy() to/from the
  1868. * corresponding MFT record, and because this form of i/o is byte
  1869. * aligned rather than block aligned, there is no need to bring the
  1870. * page uptodate here as in the non-resident case where we need to
  1871. * bring the buffers straddled by the write uptodate before
  1872. * generic_file_write() does the copying from userspace.
  1873. *
  1874. * We thus defer the uptodate bringing of the page region outside the
  1875. * region written to to ntfs_commit_write(), which makes the code
  1876. * simpler and saves one atomic kmap which is good.
  1877. */
  1878. done:
  1879. ntfs_debug("Done.");
  1880. return 0;
  1881. err_out:
  1882. if (err == -ENOMEM)
  1883. ntfs_warning(vi->i_sb, "Error allocating memory required to "
  1884. "prepare the write.");
  1885. else {
  1886. ntfs_error(vi->i_sb, "Resident attribute prepare write failed "
  1887. "with error %i.", err);
  1888. NVolSetErrors(vol);
  1889. make_bad_inode(vi);
  1890. }
  1891. err_out2:
  1892. if (ctx)
  1893. ntfs_attr_put_search_ctx(ctx);
  1894. if (m)
  1895. unmap_mft_record(base_ni);
  1896. return err;
  1897. }
  1898. /**
  1899. * ntfs_commit_nonresident_write -
  1900. *
  1901. */
  1902. static int ntfs_commit_nonresident_write(struct page *page,
  1903. unsigned from, unsigned to)
  1904. {
  1905. s64 pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
  1906. struct inode *vi = page->mapping->host;
  1907. struct buffer_head *bh, *head;
  1908. unsigned int block_start, block_end, blocksize;
  1909. BOOL partial;
  1910. ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
  1911. "0x%lx, from = %u, to = %u.", vi->i_ino,
  1912. NTFS_I(vi)->type, page->index, from, to);
  1913. blocksize = 1 << vi->i_blkbits;
  1914. // FIXME: We need a whole slew of special cases in here for compressed
  1915. // files for example...
  1916. // For now, we know ntfs_prepare_write() would have failed so we can't
  1917. // get here in any of the cases which we have to special case, so we
  1918. // are just a ripped off, unrolled generic_commit_write().
  1919. bh = head = page_buffers(page);
  1920. block_start = 0;
  1921. partial = FALSE;
  1922. do {
  1923. block_end = block_start + blocksize;
  1924. if (block_end <= from || block_start >= to) {
  1925. if (!buffer_uptodate(bh))
  1926. partial = TRUE;
  1927. } else {
  1928. set_buffer_uptodate(bh);
  1929. mark_buffer_dirty(bh);
  1930. }
  1931. } while (block_start = block_end, (bh = bh->b_this_page) != head);
  1932. /*
  1933. * If this is a partial write which happened to make all buffers
  1934. * uptodate then we can optimize away a bogus ->readpage() for the next
  1935. * read(). Here we 'discover' whether the page went uptodate as a
  1936. * result of this (potentially partial) write.
  1937. */
  1938. if (!partial)
  1939. SetPageUptodate(page);
  1940. /*
  1941. * Not convinced about this at all. See disparity comment above. For
  1942. * now we know ntfs_prepare_write() would have failed in the write
  1943. * exceeds i_size case, so this will never trigger which is fine.
  1944. */
  1945. if (pos > vi->i_size) {
  1946. ntfs_error(vi->i_sb, "Writing beyond the existing file size is "
  1947. "not supported yet. Sorry.");
  1948. return -EOPNOTSUPP;
  1949. // vi->i_size = pos;
  1950. // mark_inode_dirty(vi);
  1951. }
  1952. ntfs_debug("Done.");
  1953. return 0;
  1954. }
  1955. /**
  1956. * ntfs_commit_write - commit the received data
  1957. *
  1958. * This is called from generic_file_write() with i_sem held on the inode
  1959. * (@page->mapping->host). The @page is locked but not kmap()ped. The source
  1960. * data has already been copied into the @page. ntfs_prepare_write() has been
  1961. * called before the data copied and it returned success so we can take the
  1962. * results of various BUG checks and some error handling for granted.
  1963. *
  1964. * Need to mark modified blocks dirty so they get written out later when
  1965. * ntfs_writepage() is invoked by the VM.
  1966. *
  1967. * Return 0 on success or -errno on error.
  1968. *
  1969. * Should be using generic_commit_write(). This marks buffers uptodate and
  1970. * dirty, sets the page uptodate if all buffers in the page are uptodate, and
  1971. * updates i_size if the end of io is beyond i_size. In that case, it also
  1972. * marks the inode dirty.
  1973. *
  1974. * Cannot use generic_commit_write() due to ntfs specialities but can look at
  1975. * it for implementation guidance.
  1976. *
  1977. * If things have gone as outlined in ntfs_prepare_write(), then we do not
  1978. * need to do any page content modifications here at all, except in the write
  1979. * to resident attribute case, where we need to do the uptodate bringing here
  1980. * which we combine with the copying into the mft record which means we save
  1981. * one atomic kmap.
  1982. */
  1983. static int ntfs_commit_write(struct file *file, struct page *page,
  1984. unsigned from, unsigned to)
  1985. {
  1986. struct inode *vi = page->mapping->host;
  1987. ntfs_inode *base_ni, *ni = NTFS_I(vi);
  1988. char *kaddr, *kattr;
  1989. ntfs_attr_search_ctx *ctx;
  1990. MFT_RECORD *m;
  1991. ATTR_RECORD *a;
  1992. u32 attr_len;
  1993. int err;
  1994. ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
  1995. "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
  1996. page->index, from, to);
  1997. /* If the attribute is not resident, deal with it elsewhere. */
  1998. if (NInoNonResident(ni)) {
  1999. /* Only unnamed $DATA attributes can be compressed/encrypted. */
  2000. if (ni->type == AT_DATA && !ni->name_len) {
  2001. /* Encrypted files need separate handling. */
  2002. if (NInoEncrypted(ni)) {
  2003. // We never get here at present!
  2004. BUG();
  2005. }
  2006. /* Compressed data streams are handled in compress.c. */
  2007. if (NInoCompressed(ni)) {
  2008. // TODO: Implement this!
  2009. // return ntfs_write_compressed_block(page);
  2010. // We never get here at present!
  2011. BUG();
  2012. }
  2013. }
  2014. /* Normal data stream. */
  2015. return ntfs_commit_nonresident_write(page, from, to);
  2016. }
  2017. /*
  2018. * Attribute is resident, implying it is not compressed, encrypted, or
  2019. * sparse.
  2020. */
  2021. if (!NInoAttr(ni))
  2022. base_ni = ni;
  2023. else
  2024. base_ni = ni->ext.base_ntfs_ino;
  2025. /* Map, pin, and lock the mft record. */
  2026. m = map_mft_record(base_ni);
  2027. if (IS_ERR(m)) {
  2028. err = PTR_ERR(m);
  2029. m = NULL;
  2030. ctx = NULL;
  2031. goto err_out;
  2032. }
  2033. ctx = ntfs_attr_get_search_ctx(base_ni, m);
  2034. if (unlikely(!ctx)) {
  2035. err = -ENOMEM;
  2036. goto err_out;
  2037. }
  2038. err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
  2039. CASE_SENSITIVE, 0, NULL, 0, ctx);
  2040. if (unlikely(err)) {
  2041. if (err == -ENOENT)
  2042. err = -EIO;
  2043. goto err_out;
  2044. }
  2045. a = ctx->attr;
  2046. /* The total length of the attribute value. */
  2047. attr_len = le32_to_cpu(a->data.resident.value_length);
  2048. BUG_ON(from > attr_len);
  2049. kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
  2050. kaddr = kmap_atomic(page, KM_USER0);
  2051. /* Copy the received data from the page to the mft record. */
  2052. memcpy(kattr + from, kaddr + from, to - from);
  2053. /* Update the attribute length if necessary. */
  2054. if (to > attr_len) {
  2055. attr_len = to;
  2056. a->data.resident.value_length = cpu_to_le32(attr_len);
  2057. }
  2058. /*
  2059. * If the page is not uptodate, bring the out of bounds area(s)
  2060. * uptodate by copying data from the mft record to the page.
  2061. */
  2062. if (!PageUptodate(page)) {
  2063. if (from > 0)
  2064. memcpy(kaddr, kattr, from);
  2065. if (to < attr_len)
  2066. memcpy(kaddr + to, kattr + to, attr_len - to);
  2067. /* Zero the region outside the end of the attribute value. */
  2068. if (attr_len < PAGE_CACHE_SIZE)
  2069. memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
  2070. /*
  2071. * The probability of not having done any of the above is
  2072. * extremely small, so we just flush unconditionally.
  2073. */
  2074. flush_dcache_page(page);
  2075. SetPageUptodate(page);
  2076. }
  2077. kunmap_atomic(kaddr, KM_USER0);
  2078. /* Update i_size if necessary. */
  2079. if (vi->i_size < attr_len) {
  2080. ni->allocated_size = ni->initialized_size = attr_len;
  2081. i_size_write(vi, attr_len);
  2082. }
  2083. /* Mark the mft record dirty, so it gets written back. */
  2084. flush_dcache_mft_record_page(ctx->ntfs_ino);
  2085. mark_mft_record_dirty(ctx->ntfs_ino);
  2086. ntfs_attr_put_search_ctx(ctx);
  2087. unmap_mft_record(base_ni);
  2088. ntfs_debug("Done.");
  2089. return 0;
  2090. err_out:
  2091. if (err == -ENOMEM) {
  2092. ntfs_warning(vi->i_sb, "Error allocating memory required to "
  2093. "commit the write.");
  2094. if (PageUptodate(page)) {
  2095. ntfs_warning(vi->i_sb, "Page is uptodate, setting "
  2096. "dirty so the write will be retried "
  2097. "later on by the VM.");
  2098. /*
  2099. * Put the page on mapping->dirty_pages, but leave its
  2100. * buffers' dirty state as-is.
  2101. */
  2102. __set_page_dirty_nobuffers(page);
  2103. err = 0;
  2104. } else
  2105. ntfs_error(vi->i_sb, "Page is not uptodate. Written "
  2106. "data has been lost.");
  2107. } else {
  2108. ntfs_error(vi->i_sb, "Resident attribute commit write failed "
  2109. "with error %i.", err);
  2110. NVolSetErrors(ni->vol);
  2111. make_bad_inode(vi);
  2112. }
  2113. if (ctx)
  2114. ntfs_attr_put_search_ctx(ctx);
  2115. if (m)
  2116. unmap_mft_record(base_ni);
  2117. return err;
  2118. }
  2119. #endif /* NTFS_RW */
  2120. /**
  2121. * ntfs_aops - general address space operations for inodes and attributes
  2122. */
  2123. struct address_space_operations ntfs_aops = {
  2124. .readpage = ntfs_readpage, /* Fill page with data. */
  2125. .sync_page = block_sync_page, /* Currently, just unplugs the
  2126. disk request queue. */
  2127. #ifdef NTFS_RW
  2128. .writepage = ntfs_writepage, /* Write dirty page to disk. */
  2129. .prepare_write = ntfs_prepare_write, /* Prepare page and buffers
  2130. ready to receive data. */
  2131. .commit_write = ntfs_commit_write, /* Commit received data. */
  2132. #endif /* NTFS_RW */
  2133. };
  2134. /**
  2135. * ntfs_mst_aops - general address space operations for mst protecteed inodes
  2136. * and attributes
  2137. */
  2138. struct address_space_operations ntfs_mst_aops = {
  2139. .readpage = ntfs_readpage, /* Fill page with data. */
  2140. .sync_page = block_sync_page, /* Currently, just unplugs the
  2141. disk request queue. */
  2142. #ifdef NTFS_RW
  2143. .writepage = ntfs_writepage, /* Write dirty page to disk. */
  2144. .set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty
  2145. without touching the buffers
  2146. belonging to the page. */
  2147. #endif /* NTFS_RW */
  2148. };
  2149. #ifdef NTFS_RW
  2150. /**
  2151. * mark_ntfs_record_dirty - mark an ntfs record dirty
  2152. * @page: page containing the ntfs record to mark dirty
  2153. * @ofs: byte offset within @page at which the ntfs record begins
  2154. *
  2155. * Set the buffers and the page in which the ntfs record is located dirty.
  2156. *
  2157. * The latter also marks the vfs inode the ntfs record belongs to dirty
  2158. * (I_DIRTY_PAGES only).
  2159. *
  2160. * If the page does not have buffers, we create them and set them uptodate.
  2161. * The page may not be locked which is why we need to handle the buffers under
  2162. * the mapping->private_lock. Once the buffers are marked dirty we no longer
  2163. * need the lock since try_to_free_buffers() does not free dirty buffers.
  2164. */
  2165. void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
  2166. struct address_space *mapping = page->mapping;
  2167. ntfs_inode *ni = NTFS_I(mapping->host);
  2168. struct buffer_head *bh, *head, *buffers_to_free = NULL;
  2169. unsigned int end, bh_size, bh_ofs;
  2170. BUG_ON(!PageUptodate(page));
  2171. end = ofs + ni->itype.index.block_size;
  2172. bh_size = 1 << VFS_I(ni)->i_blkbits;
  2173. spin_lock(&mapping->private_lock);
  2174. if (unlikely(!page_has_buffers(page))) {
  2175. spin_unlock(&mapping->private_lock);
  2176. bh = head = alloc_page_buffers(page, bh_size, 1);
  2177. spin_lock(&mapping->private_lock);
  2178. if (likely(!page_has_buffers(page))) {
  2179. struct buffer_head *tail;
  2180. do {
  2181. set_buffer_uptodate(bh);
  2182. tail = bh;
  2183. bh = bh->b_this_page;
  2184. } while (bh);
  2185. tail->b_this_page = head;
  2186. attach_page_buffers(page, head);
  2187. } else
  2188. buffers_to_free = bh;
  2189. }
  2190. bh = head = page_buffers(page);
  2191. do {
  2192. bh_ofs = bh_offset(bh);
  2193. if (bh_ofs + bh_size <= ofs)
  2194. continue;
  2195. if (unlikely(bh_ofs >= end))
  2196. break;
  2197. set_buffer_dirty(bh);
  2198. } while ((bh = bh->b_this_page) != head);
  2199. spin_unlock(&mapping->private_lock);
  2200. __set_page_dirty_nobuffers(page);
  2201. if (unlikely(buffers_to_free)) {
  2202. do {
  2203. bh = buffers_to_free->b_this_page;
  2204. free_buffer_head(buffers_to_free);
  2205. buffers_to_free = bh;
  2206. } while (buffers_to_free);
  2207. }
  2208. }
  2209. #endif /* NTFS_RW */