io.c 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. /*
  2. * Some low level IO code, and hacks for various block layer limitations
  3. *
  4. * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
  5. * Copyright 2012 Google, Inc.
  6. */
  7. #include "bcache.h"
  8. #include "bset.h"
  9. #include "debug.h"
  10. static void bch_bi_idx_hack_endio(struct bio *bio, int error)
  11. {
  12. struct bio *p = bio->bi_private;
  13. bio_endio(p, error);
  14. bio_put(bio);
  15. }
  16. static void bch_generic_make_request_hack(struct bio *bio)
  17. {
  18. if (bio->bi_idx) {
  19. struct bio *clone = bio_alloc(GFP_NOIO, bio_segments(bio));
  20. memcpy(clone->bi_io_vec,
  21. bio_iovec(bio),
  22. bio_segments(bio) * sizeof(struct bio_vec));
  23. clone->bi_sector = bio->bi_sector;
  24. clone->bi_bdev = bio->bi_bdev;
  25. clone->bi_rw = bio->bi_rw;
  26. clone->bi_vcnt = bio_segments(bio);
  27. clone->bi_size = bio->bi_size;
  28. clone->bi_private = bio;
  29. clone->bi_end_io = bch_bi_idx_hack_endio;
  30. bio = clone;
  31. }
  32. /*
  33. * Hack, since drivers that clone bios clone up to bi_max_vecs, but our
  34. * bios might have had more than that (before we split them per device
  35. * limitations).
  36. *
  37. * To be taken out once immutable bvec stuff is in.
  38. */
  39. bio->bi_max_vecs = bio->bi_vcnt;
  40. generic_make_request(bio);
  41. }
  42. /**
  43. * bch_bio_split - split a bio
  44. * @bio: bio to split
  45. * @sectors: number of sectors to split from the front of @bio
  46. * @gfp: gfp mask
  47. * @bs: bio set to allocate from
  48. *
  49. * Allocates and returns a new bio which represents @sectors from the start of
  50. * @bio, and updates @bio to represent the remaining sectors.
  51. *
  52. * If bio_sectors(@bio) was less than or equal to @sectors, returns @bio
  53. * unchanged.
  54. *
  55. * The newly allocated bio will point to @bio's bi_io_vec, if the split was on a
  56. * bvec boundry; it is the caller's responsibility to ensure that @bio is not
  57. * freed before the split.
  58. *
  59. * If bch_bio_split() is running under generic_make_request(), it's not safe to
  60. * allocate more than one bio from the same bio set. Therefore, if it is running
  61. * under generic_make_request() it masks out __GFP_WAIT when doing the
  62. * allocation. The caller must check for failure if there's any possibility of
  63. * it being called from under generic_make_request(); it is then the caller's
  64. * responsibility to retry from a safe context (by e.g. punting to workqueue).
  65. */
  66. struct bio *bch_bio_split(struct bio *bio, int sectors,
  67. gfp_t gfp, struct bio_set *bs)
  68. {
  69. unsigned idx = bio->bi_idx, vcnt = 0, nbytes = sectors << 9;
  70. struct bio_vec *bv;
  71. struct bio *ret = NULL;
  72. BUG_ON(sectors <= 0);
  73. /*
  74. * If we're being called from underneath generic_make_request() and we
  75. * already allocated any bios from this bio set, we risk deadlock if we
  76. * use the mempool. So instead, we possibly fail and let the caller punt
  77. * to workqueue or somesuch and retry in a safe context.
  78. */
  79. if (current->bio_list)
  80. gfp &= ~__GFP_WAIT;
  81. if (sectors >= bio_sectors(bio))
  82. return bio;
  83. if (bio->bi_rw & REQ_DISCARD) {
  84. ret = bio_alloc_bioset(gfp, 1, bs);
  85. idx = 0;
  86. goto out;
  87. }
  88. bio_for_each_segment(bv, bio, idx) {
  89. vcnt = idx - bio->bi_idx;
  90. if (!nbytes) {
  91. ret = bio_alloc_bioset(gfp, vcnt, bs);
  92. if (!ret)
  93. return NULL;
  94. memcpy(ret->bi_io_vec, bio_iovec(bio),
  95. sizeof(struct bio_vec) * vcnt);
  96. break;
  97. } else if (nbytes < bv->bv_len) {
  98. ret = bio_alloc_bioset(gfp, ++vcnt, bs);
  99. if (!ret)
  100. return NULL;
  101. memcpy(ret->bi_io_vec, bio_iovec(bio),
  102. sizeof(struct bio_vec) * vcnt);
  103. ret->bi_io_vec[vcnt - 1].bv_len = nbytes;
  104. bv->bv_offset += nbytes;
  105. bv->bv_len -= nbytes;
  106. break;
  107. }
  108. nbytes -= bv->bv_len;
  109. }
  110. out:
  111. ret->bi_bdev = bio->bi_bdev;
  112. ret->bi_sector = bio->bi_sector;
  113. ret->bi_size = sectors << 9;
  114. ret->bi_rw = bio->bi_rw;
  115. ret->bi_vcnt = vcnt;
  116. ret->bi_max_vecs = vcnt;
  117. bio->bi_sector += sectors;
  118. bio->bi_size -= sectors << 9;
  119. bio->bi_idx = idx;
  120. if (bio_integrity(bio)) {
  121. if (bio_integrity_clone(ret, bio, gfp)) {
  122. bio_put(ret);
  123. return NULL;
  124. }
  125. bio_integrity_trim(ret, 0, bio_sectors(ret));
  126. bio_integrity_trim(bio, bio_sectors(ret), bio_sectors(bio));
  127. }
  128. return ret;
  129. }
  130. static unsigned bch_bio_max_sectors(struct bio *bio)
  131. {
  132. unsigned ret = bio_sectors(bio);
  133. struct request_queue *q = bdev_get_queue(bio->bi_bdev);
  134. unsigned max_segments = min_t(unsigned, BIO_MAX_PAGES,
  135. queue_max_segments(q));
  136. struct bio_vec *bv, *end = bio_iovec(bio) +
  137. min_t(int, bio_segments(bio), max_segments);
  138. if (bio->bi_rw & REQ_DISCARD)
  139. return min(ret, q->limits.max_discard_sectors);
  140. if (bio_segments(bio) > max_segments ||
  141. q->merge_bvec_fn) {
  142. ret = 0;
  143. for (bv = bio_iovec(bio); bv < end; bv++) {
  144. struct bvec_merge_data bvm = {
  145. .bi_bdev = bio->bi_bdev,
  146. .bi_sector = bio->bi_sector,
  147. .bi_size = ret << 9,
  148. .bi_rw = bio->bi_rw,
  149. };
  150. if (q->merge_bvec_fn &&
  151. q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len)
  152. break;
  153. ret += bv->bv_len >> 9;
  154. }
  155. }
  156. ret = min(ret, queue_max_sectors(q));
  157. WARN_ON(!ret);
  158. ret = max_t(int, ret, bio_iovec(bio)->bv_len >> 9);
  159. return ret;
  160. }
  161. static void bch_bio_submit_split_done(struct closure *cl)
  162. {
  163. struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl);
  164. s->bio->bi_end_io = s->bi_end_io;
  165. s->bio->bi_private = s->bi_private;
  166. bio_endio(s->bio, 0);
  167. closure_debug_destroy(&s->cl);
  168. mempool_free(s, s->p->bio_split_hook);
  169. }
  170. static void bch_bio_submit_split_endio(struct bio *bio, int error)
  171. {
  172. struct closure *cl = bio->bi_private;
  173. struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl);
  174. if (error)
  175. clear_bit(BIO_UPTODATE, &s->bio->bi_flags);
  176. bio_put(bio);
  177. closure_put(cl);
  178. }
  179. static void __bch_bio_submit_split(struct closure *cl)
  180. {
  181. struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl);
  182. struct bio *bio = s->bio, *n;
  183. do {
  184. n = bch_bio_split(bio, bch_bio_max_sectors(bio),
  185. GFP_NOIO, s->p->bio_split);
  186. if (!n)
  187. continue_at(cl, __bch_bio_submit_split, system_wq);
  188. n->bi_end_io = bch_bio_submit_split_endio;
  189. n->bi_private = cl;
  190. closure_get(cl);
  191. bch_generic_make_request_hack(n);
  192. } while (n != bio);
  193. continue_at(cl, bch_bio_submit_split_done, NULL);
  194. }
  195. void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p)
  196. {
  197. struct bio_split_hook *s;
  198. if (!bio_has_data(bio) && !(bio->bi_rw & REQ_DISCARD))
  199. goto submit;
  200. if (bio_sectors(bio) <= bch_bio_max_sectors(bio))
  201. goto submit;
  202. s = mempool_alloc(p->bio_split_hook, GFP_NOIO);
  203. s->bio = bio;
  204. s->p = p;
  205. s->bi_end_io = bio->bi_end_io;
  206. s->bi_private = bio->bi_private;
  207. bio_get(bio);
  208. closure_call(&s->cl, __bch_bio_submit_split, NULL, NULL);
  209. return;
  210. submit:
  211. bch_generic_make_request_hack(bio);
  212. }
  213. /* Bios with headers */
  214. void bch_bbio_free(struct bio *bio, struct cache_set *c)
  215. {
  216. struct bbio *b = container_of(bio, struct bbio, bio);
  217. mempool_free(b, c->bio_meta);
  218. }
  219. struct bio *bch_bbio_alloc(struct cache_set *c)
  220. {
  221. struct bbio *b = mempool_alloc(c->bio_meta, GFP_NOIO);
  222. struct bio *bio = &b->bio;
  223. bio_init(bio);
  224. bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET;
  225. bio->bi_max_vecs = bucket_pages(c);
  226. bio->bi_io_vec = bio->bi_inline_vecs;
  227. return bio;
  228. }
  229. void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
  230. {
  231. struct bbio *b = container_of(bio, struct bbio, bio);
  232. bio->bi_sector = PTR_OFFSET(&b->key, 0);
  233. bio->bi_bdev = PTR_CACHE(c, &b->key, 0)->bdev;
  234. b->submit_time_us = local_clock_us();
  235. closure_bio_submit(bio, bio->bi_private, PTR_CACHE(c, &b->key, 0));
  236. }
  237. void bch_submit_bbio(struct bio *bio, struct cache_set *c,
  238. struct bkey *k, unsigned ptr)
  239. {
  240. struct bbio *b = container_of(bio, struct bbio, bio);
  241. bch_bkey_copy_single_ptr(&b->key, k, ptr);
  242. __bch_submit_bbio(bio, c);
  243. }
  244. /* IO errors */
  245. void bch_count_io_errors(struct cache *ca, int error, const char *m)
  246. {
  247. /*
  248. * The halflife of an error is:
  249. * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh
  250. */
  251. if (ca->set->error_decay) {
  252. unsigned count = atomic_inc_return(&ca->io_count);
  253. while (count > ca->set->error_decay) {
  254. unsigned errors;
  255. unsigned old = count;
  256. unsigned new = count - ca->set->error_decay;
  257. /*
  258. * First we subtract refresh from count; each time we
  259. * succesfully do so, we rescale the errors once:
  260. */
  261. count = atomic_cmpxchg(&ca->io_count, old, new);
  262. if (count == old) {
  263. count = new;
  264. errors = atomic_read(&ca->io_errors);
  265. do {
  266. old = errors;
  267. new = ((uint64_t) errors * 127) / 128;
  268. errors = atomic_cmpxchg(&ca->io_errors,
  269. old, new);
  270. } while (old != errors);
  271. }
  272. }
  273. }
  274. if (error) {
  275. char buf[BDEVNAME_SIZE];
  276. unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT,
  277. &ca->io_errors);
  278. errors >>= IO_ERROR_SHIFT;
  279. if (errors < ca->set->error_limit)
  280. pr_err("%s: IO error on %s, recovering",
  281. bdevname(ca->bdev, buf), m);
  282. else
  283. bch_cache_set_error(ca->set,
  284. "%s: too many IO errors %s",
  285. bdevname(ca->bdev, buf), m);
  286. }
  287. }
  288. void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
  289. int error, const char *m)
  290. {
  291. struct bbio *b = container_of(bio, struct bbio, bio);
  292. struct cache *ca = PTR_CACHE(c, &b->key, 0);
  293. unsigned threshold = bio->bi_rw & REQ_WRITE
  294. ? c->congested_write_threshold_us
  295. : c->congested_read_threshold_us;
  296. if (threshold) {
  297. unsigned t = local_clock_us();
  298. int us = t - b->submit_time_us;
  299. int congested = atomic_read(&c->congested);
  300. if (us > (int) threshold) {
  301. int ms = us / 1024;
  302. c->congested_last_us = t;
  303. ms = min(ms, CONGESTED_MAX + congested);
  304. atomic_sub(ms, &c->congested);
  305. } else if (congested < 0)
  306. atomic_inc(&c->congested);
  307. }
  308. bch_count_io_errors(ca, error, m);
  309. }
  310. void bch_bbio_endio(struct cache_set *c, struct bio *bio,
  311. int error, const char *m)
  312. {
  313. struct closure *cl = bio->bi_private;
  314. bch_bbio_count_io_errors(c, bio, error, m);
  315. bio_put(bio);
  316. closure_put(cl);
  317. }