zlib.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639
  1. /*
  2. * Copyright (C) 2008 Oracle. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public
  6. * License v2 as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public
  14. * License along with this program; if not, write to the
  15. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16. * Boston, MA 021110-1307, USA.
  17. *
  18. * Based on jffs2 zlib code:
  19. * Copyright © 2001-2007 Red Hat, Inc.
  20. * Created by David Woodhouse <dwmw2@infradead.org>
  21. */
  22. #include <linux/kernel.h>
  23. #include <linux/slab.h>
  24. #include <linux/zlib.h>
  25. #include <linux/zutil.h>
  26. #include <linux/vmalloc.h>
  27. #include <linux/init.h>
  28. #include <linux/err.h>
  29. #include <linux/sched.h>
  30. #include <linux/pagemap.h>
  31. #include <linux/bio.h>
  32. #include "compression.h"
  33. /* Plan: call deflate() with avail_in == *sourcelen,
  34. avail_out = *dstlen - 12 and flush == Z_FINISH.
  35. If it doesn't manage to finish, call it again with
  36. avail_in == 0 and avail_out set to the remaining 12
  37. bytes for it to clean up.
  38. Q: Is 12 bytes sufficient?
  39. */
  40. #define STREAM_END_SPACE 12
  41. struct workspace {
  42. z_stream inf_strm;
  43. z_stream def_strm;
  44. char *buf;
  45. struct list_head list;
  46. };
  47. static LIST_HEAD(idle_workspace);
  48. static DEFINE_SPINLOCK(workspace_lock);
  49. static unsigned long num_workspace;
  50. static atomic_t alloc_workspace = ATOMIC_INIT(0);
  51. static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);
  52. /*
  53. * this finds an available zlib workspace or allocates a new one
  54. * NULL or an ERR_PTR is returned if things go bad.
  55. */
  56. static struct workspace *find_zlib_workspace(void)
  57. {
  58. struct workspace *workspace;
  59. int ret;
  60. int cpus = num_online_cpus();
  61. again:
  62. spin_lock(&workspace_lock);
  63. if (!list_empty(&idle_workspace)) {
  64. workspace = list_entry(idle_workspace.next, struct workspace,
  65. list);
  66. list_del(&workspace->list);
  67. num_workspace--;
  68. spin_unlock(&workspace_lock);
  69. return workspace;
  70. }
  71. spin_unlock(&workspace_lock);
  72. if (atomic_read(&alloc_workspace) > cpus) {
  73. DEFINE_WAIT(wait);
  74. prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
  75. if (atomic_read(&alloc_workspace) > cpus)
  76. schedule();
  77. finish_wait(&workspace_wait, &wait);
  78. goto again;
  79. }
  80. atomic_inc(&alloc_workspace);
  81. workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
  82. if (!workspace) {
  83. ret = -ENOMEM;
  84. goto fail;
  85. }
  86. workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
  87. if (!workspace->def_strm.workspace) {
  88. ret = -ENOMEM;
  89. goto fail;
  90. }
  91. workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
  92. if (!workspace->inf_strm.workspace) {
  93. ret = -ENOMEM;
  94. goto fail_inflate;
  95. }
  96. workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
  97. if (!workspace->buf) {
  98. ret = -ENOMEM;
  99. goto fail_kmalloc;
  100. }
  101. return workspace;
  102. fail_kmalloc:
  103. vfree(workspace->inf_strm.workspace);
  104. fail_inflate:
  105. vfree(workspace->def_strm.workspace);
  106. fail:
  107. kfree(workspace);
  108. atomic_dec(&alloc_workspace);
  109. wake_up(&workspace_wait);
  110. return ERR_PTR(ret);
  111. }
  112. /*
  113. * put a workspace struct back on the list or free it if we have enough
  114. * idle ones sitting around
  115. */
  116. static int free_workspace(struct workspace *workspace)
  117. {
  118. spin_lock(&workspace_lock);
  119. if (num_workspace < num_online_cpus()) {
  120. list_add_tail(&workspace->list, &idle_workspace);
  121. num_workspace++;
  122. spin_unlock(&workspace_lock);
  123. if (waitqueue_active(&workspace_wait))
  124. wake_up(&workspace_wait);
  125. return 0;
  126. }
  127. spin_unlock(&workspace_lock);
  128. vfree(workspace->def_strm.workspace);
  129. vfree(workspace->inf_strm.workspace);
  130. kfree(workspace->buf);
  131. kfree(workspace);
  132. atomic_dec(&alloc_workspace);
  133. if (waitqueue_active(&workspace_wait))
  134. wake_up(&workspace_wait);
  135. return 0;
  136. }
  137. /*
  138. * cleanup function for module exit
  139. */
  140. static void free_workspaces(void)
  141. {
  142. struct workspace *workspace;
  143. while(!list_empty(&idle_workspace)) {
  144. workspace = list_entry(idle_workspace.next, struct workspace,
  145. list);
  146. list_del(&workspace->list);
  147. vfree(workspace->def_strm.workspace);
  148. vfree(workspace->inf_strm.workspace);
  149. kfree(workspace->buf);
  150. kfree(workspace);
  151. atomic_dec(&alloc_workspace);
  152. }
  153. }
  154. /*
  155. * given an address space and start/len, compress the bytes.
  156. *
  157. * pages are allocated to hold the compressed result and stored
  158. * in 'pages'
  159. *
  160. * out_pages is used to return the number of pages allocated. There
  161. * may be pages allocated even if we return an error
  162. *
  163. * total_in is used to return the number of bytes actually read. It
  164. * may be smaller then len if we had to exit early because we
  165. * ran out of room in the pages array or because we cross the
  166. * max_out threshold.
  167. *
  168. * total_out is used to return the total number of compressed bytes
  169. *
  170. * max_out tells us the max number of bytes that we're allowed to
  171. * stuff into pages
  172. */
  173. int btrfs_zlib_compress_pages(struct address_space *mapping,
  174. u64 start, unsigned long len,
  175. struct page **pages,
  176. unsigned long nr_dest_pages,
  177. unsigned long *out_pages,
  178. unsigned long *total_in,
  179. unsigned long *total_out,
  180. unsigned long max_out)
  181. {
  182. int ret;
  183. struct workspace *workspace;
  184. char *data_in;
  185. char *cpage_out;
  186. int nr_pages = 0;
  187. struct page *in_page = NULL;
  188. struct page *out_page = NULL;
  189. int out_written = 0;
  190. int in_read = 0;
  191. unsigned long bytes_left;
  192. *out_pages = 0;
  193. *total_out = 0;
  194. *total_in = 0;
  195. workspace = find_zlib_workspace();
  196. if (!workspace)
  197. return -1;
  198. if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
  199. printk(KERN_WARNING "deflateInit failed\n");
  200. ret = -1;
  201. goto out;
  202. }
  203. workspace->def_strm.total_in = 0;
  204. workspace->def_strm.total_out = 0;
  205. in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
  206. data_in = kmap(in_page);
  207. out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
  208. cpage_out = kmap(out_page);
  209. pages[0] = out_page;
  210. nr_pages = 1;
  211. workspace->def_strm.next_in = data_in;
  212. workspace->def_strm.next_out = cpage_out;
  213. workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
  214. workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
  215. out_written = 0;
  216. in_read = 0;
  217. while (workspace->def_strm.total_in < len) {
  218. ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
  219. if (ret != Z_OK) {
  220. printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
  221. ret);
  222. zlib_deflateEnd(&workspace->def_strm);
  223. ret = -1;
  224. goto out;
  225. }
  226. /* we're making it bigger, give up */
  227. if (workspace->def_strm.total_in > 8192 &&
  228. workspace->def_strm.total_in <
  229. workspace->def_strm.total_out) {
  230. ret = -1;
  231. goto out;
  232. }
  233. /* we need another page for writing out. Test this
  234. * before the total_in so we will pull in a new page for
  235. * the stream end if required
  236. */
  237. if (workspace->def_strm.avail_out == 0) {
  238. kunmap(out_page);
  239. if (nr_pages == nr_dest_pages) {
  240. out_page = NULL;
  241. ret = -1;
  242. goto out;
  243. }
  244. out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
  245. cpage_out = kmap(out_page);
  246. pages[nr_pages] = out_page;
  247. nr_pages++;
  248. workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
  249. workspace->def_strm.next_out = cpage_out;
  250. }
  251. /* we're all done */
  252. if (workspace->def_strm.total_in >= len)
  253. break;
  254. /* we've read in a full page, get a new one */
  255. if (workspace->def_strm.avail_in == 0) {
  256. if (workspace->def_strm.total_out > max_out)
  257. break;
  258. bytes_left = len - workspace->def_strm.total_in;
  259. kunmap(in_page);
  260. page_cache_release(in_page);
  261. start += PAGE_CACHE_SIZE;
  262. in_page = find_get_page(mapping,
  263. start >> PAGE_CACHE_SHIFT);
  264. data_in = kmap(in_page);
  265. workspace->def_strm.avail_in = min(bytes_left,
  266. PAGE_CACHE_SIZE);
  267. workspace->def_strm.next_in = data_in;
  268. }
  269. }
  270. workspace->def_strm.avail_in = 0;
  271. ret = zlib_deflate(&workspace->def_strm, Z_FINISH);
  272. zlib_deflateEnd(&workspace->def_strm);
  273. if (ret != Z_STREAM_END) {
  274. ret = -1;
  275. goto out;
  276. }
  277. if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
  278. ret = -1;
  279. goto out;
  280. }
  281. ret = 0;
  282. *total_out = workspace->def_strm.total_out;
  283. *total_in = workspace->def_strm.total_in;
  284. out:
  285. *out_pages = nr_pages;
  286. if (out_page)
  287. kunmap(out_page);
  288. if (in_page) {
  289. kunmap(in_page);
  290. page_cache_release(in_page);
  291. }
  292. free_workspace(workspace);
  293. return ret;
  294. }
  295. /*
  296. * pages_in is an array of pages with compressed data.
  297. *
  298. * disk_start is the starting logical offset of this array in the file
  299. *
  300. * bvec is a bio_vec of pages from the file that we want to decompress into
  301. *
  302. * vcnt is the count of pages in the biovec
  303. *
  304. * srclen is the number of bytes in pages_in
  305. *
  306. * The basic idea is that we have a bio that was created by readpages.
  307. * The pages in the bio are for the uncompressed data, and they may not
  308. * be contiguous. They all correspond to the range of bytes covered by
  309. * the compressed extent.
  310. */
  311. int btrfs_zlib_decompress_biovec(struct page **pages_in,
  312. u64 disk_start,
  313. struct bio_vec *bvec,
  314. int vcnt,
  315. size_t srclen)
  316. {
  317. int ret = 0;
  318. int wbits = MAX_WBITS;
  319. struct workspace *workspace;
  320. char *data_in;
  321. size_t total_out = 0;
  322. unsigned long page_bytes_left;
  323. unsigned long page_in_index = 0;
  324. unsigned long page_out_index = 0;
  325. struct page *page_out;
  326. unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
  327. PAGE_CACHE_SIZE;
  328. unsigned long buf_start;
  329. unsigned long buf_offset;
  330. unsigned long bytes;
  331. unsigned long working_bytes;
  332. unsigned long pg_offset;
  333. unsigned long start_byte;
  334. unsigned long current_buf_start;
  335. char *kaddr;
  336. workspace = find_zlib_workspace();
  337. if (!workspace)
  338. return -ENOMEM;
  339. data_in = kmap(pages_in[page_in_index]);
  340. workspace->inf_strm.next_in = data_in;
  341. workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
  342. workspace->inf_strm.total_in = 0;
  343. workspace->inf_strm.total_out = 0;
  344. workspace->inf_strm.next_out = workspace->buf;
  345. workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
  346. page_out = bvec[page_out_index].bv_page;
  347. page_bytes_left = PAGE_CACHE_SIZE;
  348. pg_offset = 0;
  349. /* If it's deflate, and it's got no preset dictionary, then
  350. we can tell zlib to skip the adler32 check. */
  351. if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
  352. ((data_in[0] & 0x0f) == Z_DEFLATED) &&
  353. !(((data_in[0]<<8) + data_in[1]) % 31)) {
  354. wbits = -((data_in[0] >> 4) + 8);
  355. workspace->inf_strm.next_in += 2;
  356. workspace->inf_strm.avail_in -= 2;
  357. }
  358. if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
  359. printk(KERN_WARNING "inflateInit failed\n");
  360. ret = -1;
  361. goto out;
  362. }
  363. while(workspace->inf_strm.total_in < srclen) {
  364. ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
  365. if (ret != Z_OK && ret != Z_STREAM_END) {
  366. break;
  367. }
  368. /*
  369. * buf start is the byte offset we're of the start of
  370. * our workspace buffer
  371. */
  372. buf_start = total_out;
  373. /* total_out is the last byte of the workspace buffer */
  374. total_out = workspace->inf_strm.total_out;
  375. working_bytes = total_out - buf_start;
  376. /*
  377. * start byte is the first byte of the page we're currently
  378. * copying into relative to the start of the compressed data.
  379. */
  380. start_byte = page_offset(page_out) - disk_start;
  381. if (working_bytes == 0) {
  382. /* we didn't make progress in this inflate
  383. * call, we're done
  384. */
  385. if (ret != Z_STREAM_END) {
  386. ret = -1;
  387. }
  388. break;
  389. }
  390. /* we haven't yet hit data corresponding to this page */
  391. if (total_out <= start_byte) {
  392. goto next;
  393. }
  394. /*
  395. * the start of the data we care about is offset into
  396. * the middle of our working buffer
  397. */
  398. if (total_out > start_byte && buf_start < start_byte) {
  399. buf_offset = start_byte - buf_start;
  400. working_bytes -= buf_offset;
  401. } else {
  402. buf_offset = 0;
  403. }
  404. current_buf_start = buf_start;
  405. /* copy bytes from the working buffer into the pages */
  406. while(working_bytes > 0) {
  407. bytes = min(PAGE_CACHE_SIZE - pg_offset,
  408. PAGE_CACHE_SIZE - buf_offset);
  409. bytes = min(bytes, working_bytes);
  410. kaddr = kmap_atomic(page_out, KM_USER0);
  411. memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
  412. bytes);
  413. kunmap_atomic(kaddr, KM_USER0);
  414. flush_dcache_page(page_out);
  415. pg_offset += bytes;
  416. page_bytes_left -= bytes;
  417. buf_offset += bytes;
  418. working_bytes -= bytes;
  419. current_buf_start += bytes;
  420. /* check if we need to pick another page */
  421. if (page_bytes_left == 0) {
  422. page_out_index++;
  423. if (page_out_index >= vcnt) {
  424. ret = 0;
  425. goto done;
  426. }
  427. page_out = bvec[page_out_index].bv_page;
  428. pg_offset = 0;
  429. page_bytes_left = PAGE_CACHE_SIZE;
  430. start_byte = page_offset(page_out) - disk_start;
  431. /*
  432. * make sure our new page is covered by this
  433. * working buffer
  434. */
  435. if (total_out <= start_byte) {
  436. goto next;
  437. }
  438. /* the next page in the biovec might not
  439. * be adjacent to the last page, but it
  440. * might still be found inside this working
  441. * buffer. bump our offset pointer
  442. */
  443. if (total_out > start_byte &&
  444. current_buf_start < start_byte) {
  445. buf_offset = start_byte - buf_start;
  446. working_bytes = total_out - start_byte;
  447. current_buf_start = buf_start +
  448. buf_offset;
  449. }
  450. }
  451. }
  452. next:
  453. workspace->inf_strm.next_out = workspace->buf;
  454. workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
  455. if (workspace->inf_strm.avail_in == 0) {
  456. unsigned long tmp;
  457. kunmap(pages_in[page_in_index]);
  458. page_in_index++;
  459. if (page_in_index >= total_pages_in) {
  460. data_in = NULL;
  461. break;
  462. }
  463. data_in = kmap(pages_in[page_in_index]);
  464. workspace->inf_strm.next_in = data_in;
  465. tmp = srclen - workspace->inf_strm.total_in;
  466. workspace->inf_strm.avail_in = min(tmp,
  467. PAGE_CACHE_SIZE);
  468. }
  469. }
  470. if (ret != Z_STREAM_END) {
  471. ret = -1;
  472. } else {
  473. ret = 0;
  474. }
  475. done:
  476. zlib_inflateEnd(&workspace->inf_strm);
  477. if (data_in)
  478. kunmap(pages_in[page_in_index]);
  479. out:
  480. free_workspace(workspace);
  481. return ret;
  482. }
  483. /*
  484. * a less complex decompression routine. Our compressed data fits in a
  485. * single page, and we want to read a single page out of it.
  486. * start_byte tells us the offset into the compressed data we're interested in
  487. */
  488. int btrfs_zlib_decompress(unsigned char *data_in,
  489. struct page *dest_page,
  490. unsigned long start_byte,
  491. size_t srclen, size_t destlen)
  492. {
  493. int ret = 0;
  494. int wbits = MAX_WBITS;
  495. struct workspace *workspace;
  496. unsigned long bytes_left = destlen;
  497. unsigned long total_out = 0;
  498. char *kaddr;
  499. if (destlen > PAGE_CACHE_SIZE)
  500. return -ENOMEM;
  501. workspace = find_zlib_workspace();
  502. if (!workspace)
  503. return -ENOMEM;
  504. workspace->inf_strm.next_in = data_in;
  505. workspace->inf_strm.avail_in = srclen;
  506. workspace->inf_strm.total_in = 0;
  507. workspace->inf_strm.next_out = workspace->buf;
  508. workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
  509. workspace->inf_strm.total_out = 0;
  510. /* If it's deflate, and it's got no preset dictionary, then
  511. we can tell zlib to skip the adler32 check. */
  512. if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
  513. ((data_in[0] & 0x0f) == Z_DEFLATED) &&
  514. !(((data_in[0]<<8) + data_in[1]) % 31)) {
  515. wbits = -((data_in[0] >> 4) + 8);
  516. workspace->inf_strm.next_in += 2;
  517. workspace->inf_strm.avail_in -= 2;
  518. }
  519. if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
  520. printk(KERN_WARNING "inflateInit failed\n");
  521. ret = -1;
  522. goto out;
  523. }
  524. while(bytes_left > 0) {
  525. unsigned long buf_start;
  526. unsigned long buf_offset;
  527. unsigned long bytes;
  528. unsigned long pg_offset = 0;
  529. ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
  530. if (ret != Z_OK && ret != Z_STREAM_END) {
  531. break;
  532. }
  533. buf_start = total_out;
  534. total_out = workspace->inf_strm.total_out;
  535. if (total_out == buf_start) {
  536. ret = -1;
  537. break;
  538. }
  539. if (total_out <= start_byte) {
  540. goto next;
  541. }
  542. if (total_out > start_byte && buf_start < start_byte) {
  543. buf_offset = start_byte - buf_start;
  544. } else {
  545. buf_offset = 0;
  546. }
  547. bytes = min(PAGE_CACHE_SIZE - pg_offset,
  548. PAGE_CACHE_SIZE - buf_offset);
  549. bytes = min(bytes, bytes_left);
  550. kaddr = kmap_atomic(dest_page, KM_USER0);
  551. memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
  552. kunmap_atomic(kaddr, KM_USER0);
  553. pg_offset += bytes;
  554. bytes_left -= bytes;
  555. next:
  556. workspace->inf_strm.next_out = workspace->buf;
  557. workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
  558. }
  559. if (ret != Z_STREAM_END && bytes_left != 0) {
  560. ret = -1;
  561. } else {
  562. ret = 0;
  563. }
  564. zlib_inflateEnd(&workspace->inf_strm);
  565. out:
  566. free_workspace(workspace);
  567. return ret;
  568. }
  569. void btrfs_zlib_exit(void)
  570. {
  571. free_workspaces();
  572. }