dst.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
  1. /*
  2. * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
  3. * All rights reserved.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. */
  15. #ifndef __DST_H
  16. #define __DST_H
  17. #include <linux/types.h>
  18. #include <linux/connector.h>
  19. #define DST_NAMELEN 32
  20. #define DST_NAME "dst"
  21. enum {
  22. /* Remove node with given id from storage */
  23. DST_DEL_NODE = 0,
  24. /* Add remote node with given id to the storage */
  25. DST_ADD_REMOTE,
  26. /* Add local node with given id to the storage to be exported and used by remote peers */
  27. DST_ADD_EXPORT,
  28. /* Crypto initialization command (hash/cipher used to protect the connection) */
  29. DST_CRYPTO,
  30. /* Security attributes for given connection (permissions for example) */
  31. DST_SECURITY,
  32. /* Register given node in the block layer subsystem */
  33. DST_START,
  34. DST_CMD_MAX
  35. };
  36. struct dst_ctl
  37. {
  38. /* Storage name */
  39. char name[DST_NAMELEN];
  40. /* Command flags */
  41. __u32 flags;
  42. /* Command itself (see above) */
  43. __u32 cmd;
  44. /* Maximum number of pages per single request in this device */
  45. __u32 max_pages;
  46. /* Stale/error transaction scanning timeout in milliseconds */
  47. __u32 trans_scan_timeout;
  48. /* Maximum number of retry sends before completing transaction as broken */
  49. __u32 trans_max_retries;
  50. /* Storage size */
  51. __u64 size;
  52. };
  53. /* Reply command carries completion status */
  54. struct dst_ctl_ack
  55. {
  56. struct cn_msg msg;
  57. int error;
  58. int unused[3];
  59. };
  60. /*
  61. * Unfortunaltely socket address structure is not exported to userspace
  62. * and is redefined there.
  63. */
  64. #define SADDR_MAX_DATA 128
  65. struct saddr {
  66. /* address family, AF_xxx */
  67. unsigned short sa_family;
  68. /* 14 bytes of protocol address */
  69. char sa_data[SADDR_MAX_DATA];
  70. /* Number of bytes used in sa_data */
  71. unsigned short sa_data_len;
  72. };
  73. /* Address structure */
  74. struct dst_network_ctl
  75. {
  76. /* Socket type: datagram, stream...*/
  77. unsigned int type;
  78. /* Let me guess, is it a Jupiter diameter? */
  79. unsigned int proto;
  80. /* Peer's address */
  81. struct saddr addr;
  82. };
  83. struct dst_crypto_ctl
  84. {
  85. /* Cipher and hash names */
  86. char cipher_algo[DST_NAMELEN];
  87. char hash_algo[DST_NAMELEN];
  88. /* Key sizes. Can be zero for digest for example */
  89. unsigned int cipher_keysize, hash_keysize;
  90. /* Alignment. Calculated by the DST itself. */
  91. unsigned int crypto_attached_size;
  92. /* Number of threads to perform crypto operations */
  93. int thread_num;
  94. };
  95. /* Export security attributes have this bits checked in when client connects */
  96. #define DST_PERM_READ (1<<0)
  97. #define DST_PERM_WRITE (1<<1)
  98. /*
  99. * Right now it is simple model, where each remote address
  100. * is assigned to set of permissions it is allowed to perform.
  101. * In real world block device does not know anything but
  102. * reading and writing, so it should be more than enough.
  103. */
  104. struct dst_secure_user
  105. {
  106. unsigned int permissions;
  107. struct saddr addr;
  108. };
  109. /*
  110. * Export control command: device to export and network address to accept
  111. * clients to work with given device
  112. */
  113. struct dst_export_ctl
  114. {
  115. char device[DST_NAMELEN];
  116. struct dst_network_ctl ctl;
  117. };
  118. enum {
  119. DST_CFG = 1, /* Request remote configuration */
  120. DST_IO, /* IO command */
  121. DST_IO_RESPONSE, /* IO response */
  122. DST_PING, /* Keepalive message */
  123. DST_NCMD_MAX,
  124. };
  125. struct dst_cmd
  126. {
  127. /* Network command itself, see above */
  128. __u32 cmd;
  129. /*
  130. * Size of the attached data
  131. * (in most cases, for READ command it means how many bytes were requested)
  132. */
  133. __u32 size;
  134. /* Crypto size: number of attached bytes with digest/hmac */
  135. __u32 csize;
  136. /* Here we can carry secret data */
  137. __u32 reserved;
  138. /* Read/write bits, see how they are encoded in bio structure */
  139. __u64 rw;
  140. /* BIO flags */
  141. __u64 flags;
  142. /* Unique command id (like transaction ID) */
  143. __u64 id;
  144. /* Sector to start IO from */
  145. __u64 sector;
  146. /* Hash data is placed after this header */
  147. __u8 hash[0];
  148. };
  149. /*
  150. * Convert command to/from network byte order.
  151. * We do not use hton*() functions, since there is
  152. * no 64-bit implementation.
  153. */
  154. static inline void dst_convert_cmd(struct dst_cmd *c)
  155. {
  156. c->cmd = __cpu_to_be32(c->cmd);
  157. c->csize = __cpu_to_be32(c->csize);
  158. c->size = __cpu_to_be32(c->size);
  159. c->sector = __cpu_to_be64(c->sector);
  160. c->id = __cpu_to_be64(c->id);
  161. c->flags = __cpu_to_be64(c->flags);
  162. c->rw = __cpu_to_be64(c->rw);
  163. }
  164. /* Transaction id */
  165. typedef __u64 dst_gen_t;
  166. #ifdef __KERNEL__
  167. #include <linux/blkdev.h>
  168. #include <linux/bio.h>
  169. #include <linux/device.h>
  170. #include <linux/mempool.h>
  171. #include <linux/net.h>
  172. #include <linux/poll.h>
  173. #include <linux/rbtree.h>
  174. #ifdef CONFIG_DST_DEBUG
  175. #define dprintk(f, a...) printk(KERN_NOTICE f, ##a)
  176. #else
  177. static inline void __attribute__ ((format (printf, 1, 2)))
  178. dprintk(const char *fmt, ...) {}
  179. #endif
  180. struct dst_node;
  181. struct dst_trans
  182. {
  183. /* DST node we are working with */
  184. struct dst_node *n;
  185. /* Entry inside transaction tree */
  186. struct rb_node trans_entry;
  187. /* Merlin kills this transaction when this memory cell equals zero */
  188. atomic_t refcnt;
  189. /* How this transaction should be processed by crypto engine */
  190. short enc;
  191. /* How many times this transaction was resent */
  192. short retries;
  193. /* Completion status */
  194. int error;
  195. /* When did we send it to the remote peer */
  196. long send_time;
  197. /* My name is...
  198. * Well, computers does not speak, they have unique id instead */
  199. dst_gen_t gen;
  200. /* Block IO we are working with */
  201. struct bio *bio;
  202. /* Network command for above block IO request */
  203. struct dst_cmd cmd;
  204. };
  205. struct dst_crypto_engine
  206. {
  207. /* What should we do with all block requests */
  208. struct crypto_hash *hash;
  209. struct crypto_ablkcipher *cipher;
  210. /* Pool of pages used to encrypt data into before sending */
  211. int page_num;
  212. struct page **pages;
  213. /* What to do with current request */
  214. int enc;
  215. /* Who we are and where do we go */
  216. struct scatterlist *src, *dst;
  217. /* Maximum timeout waiting for encryption to be completed */
  218. long timeout;
  219. /* IV is a 64-bit sequential counter */
  220. u64 iv;
  221. /* Secret data */
  222. void *private;
  223. /* Cached temporary data lives here */
  224. int size;
  225. void *data;
  226. };
  227. struct dst_state
  228. {
  229. /* The main state protection */
  230. struct mutex state_lock;
  231. /* Polling machinery for sockets */
  232. wait_queue_t wait;
  233. wait_queue_head_t *whead;
  234. /* Most of events are being waited here */
  235. wait_queue_head_t thread_wait;
  236. /* Who owns this? */
  237. struct dst_node *node;
  238. /* Network address for this state */
  239. struct dst_network_ctl ctl;
  240. /* Permissions to work with: read-only or rw connection */
  241. u32 permissions;
  242. /* Called when we need to clean private data */
  243. void (* cleanup)(struct dst_state *st);
  244. /* Used by the server: BIO completion queues BIOs here */
  245. struct list_head request_list;
  246. spinlock_t request_lock;
  247. /* Guess what? No, it is not number of planets */
  248. atomic_t refcnt;
  249. /* This flags is set when connection should be dropped */
  250. int need_exit;
  251. /*
  252. * Socket to work with. Second pointer is used for
  253. * lockless check if socket was changed before performing
  254. * next action (like working with cached polling result)
  255. */
  256. struct socket *socket, *read_socket;
  257. /* Cached preallocated data */
  258. void *data;
  259. unsigned int size;
  260. /* Currently processed command */
  261. struct dst_cmd cmd;
  262. };
  263. struct dst_info
  264. {
  265. /* Device size */
  266. u64 size;
  267. /* Local device name for export devices */
  268. char local[DST_NAMELEN];
  269. /* Network setup */
  270. struct dst_network_ctl net;
  271. /* Sysfs bits use this */
  272. struct device device;
  273. };
  274. struct dst_node
  275. {
  276. struct list_head node_entry;
  277. /* Hi, my name is stored here */
  278. char name[DST_NAMELEN];
  279. /* My cache name is stored here */
  280. char cache_name[DST_NAMELEN];
  281. /* Block device attached to given node.
  282. * Only valid for exporting nodes */
  283. struct block_device *bdev;
  284. /* Network state machine for given peer */
  285. struct dst_state *state;
  286. /* Block IO machinery */
  287. struct request_queue *queue;
  288. struct gendisk *disk;
  289. /* Number of threads in processing pool */
  290. int thread_num;
  291. /* Maximum number of pages in single IO */
  292. int max_pages;
  293. /* I'm that big in bytes */
  294. loff_t size;
  295. /* Exported to userspace node information */
  296. struct dst_info *info;
  297. /*
  298. * Security attribute list.
  299. * Used only by exporting node currently.
  300. */
  301. struct list_head security_list;
  302. struct mutex security_lock;
  303. /*
  304. * When this unerflows below zero, university collapses.
  305. * But this will not happen, since node will be freed,
  306. * when reference counter reaches zero.
  307. */
  308. atomic_t refcnt;
  309. /* How precisely should I be started? */
  310. int (*start)(struct dst_node *);
  311. /* Crypto capabilities */
  312. struct dst_crypto_ctl crypto;
  313. u8 *hash_key;
  314. u8 *cipher_key;
  315. /* Pool of processing thread */
  316. struct thread_pool *pool;
  317. /* Transaction IDs live here */
  318. atomic_long_t gen;
  319. /*
  320. * How frequently and how many times transaction
  321. * tree should be scanned to drop stale objects.
  322. */
  323. long trans_scan_timeout;
  324. int trans_max_retries;
  325. /* Small gnomes live here */
  326. struct rb_root trans_root;
  327. struct mutex trans_lock;
  328. /*
  329. * Transaction cache/memory pool.
  330. * It is big enough to contain not only transaction
  331. * itself, but additional crypto data (digest/hmac).
  332. */
  333. struct kmem_cache *trans_cache;
  334. mempool_t *trans_pool;
  335. /* This entity scans transaction tree */
  336. struct delayed_work trans_work;
  337. wait_queue_head_t wait;
  338. };
  339. /* Kernel representation of the security attribute */
  340. struct dst_secure
  341. {
  342. struct list_head sec_entry;
  343. struct dst_secure_user sec;
  344. };
  345. int dst_process_bio(struct dst_node *n, struct bio *bio);
  346. int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r);
  347. int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le);
  348. static inline struct dst_state *dst_state_get(struct dst_state *st)
  349. {
  350. BUG_ON(atomic_read(&st->refcnt) == 0);
  351. atomic_inc(&st->refcnt);
  352. return st;
  353. }
  354. void dst_state_put(struct dst_state *st);
  355. struct dst_state *dst_state_alloc(struct dst_node *n);
  356. int dst_state_socket_create(struct dst_state *st);
  357. void dst_state_socket_release(struct dst_state *st);
  358. void dst_state_exit_connected(struct dst_state *st);
  359. int dst_state_schedule_receiver(struct dst_state *st);
  360. void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str);
  361. static inline void dst_state_lock(struct dst_state *st)
  362. {
  363. mutex_lock(&st->state_lock);
  364. }
  365. static inline void dst_state_unlock(struct dst_state *st)
  366. {
  367. mutex_unlock(&st->state_lock);
  368. }
  369. void dst_poll_exit(struct dst_state *st);
  370. int dst_poll_init(struct dst_state *st);
  371. static inline unsigned int dst_state_poll(struct dst_state *st)
  372. {
  373. unsigned int revents = POLLHUP | POLLERR;
  374. dst_state_lock(st);
  375. if (st->socket)
  376. revents = st->socket->ops->poll(NULL, st->socket, NULL);
  377. dst_state_unlock(st);
  378. return revents;
  379. }
  380. static inline int dst_thread_setup(void *private, void *data)
  381. {
  382. return 0;
  383. }
  384. void dst_node_put(struct dst_node *n);
  385. static inline struct dst_node *dst_node_get(struct dst_node *n)
  386. {
  387. atomic_inc(&n->refcnt);
  388. return n;
  389. }
  390. int dst_data_recv(struct dst_state *st, void *data, unsigned int size);
  391. int dst_recv_cdata(struct dst_state *st, void *cdata);
  392. int dst_data_send_header(struct socket *sock,
  393. void *data, unsigned int size, int more);
  394. int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio);
  395. int dst_process_io(struct dst_state *st);
  396. int dst_export_crypto(struct dst_node *n, struct bio *bio);
  397. int dst_export_send_bio(struct bio *bio);
  398. int dst_start_export(struct dst_node *n);
  399. int __init dst_export_init(void);
  400. void dst_export_exit(void);
  401. /* Private structure for export block IO requests */
  402. struct dst_export_priv
  403. {
  404. struct list_head request_entry;
  405. struct dst_state *state;
  406. struct bio *bio;
  407. struct dst_cmd cmd;
  408. };
  409. static inline void dst_trans_get(struct dst_trans *t)
  410. {
  411. atomic_inc(&t->refcnt);
  412. }
  413. struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen);
  414. int dst_trans_remove(struct dst_trans *t);
  415. int dst_trans_remove_nolock(struct dst_trans *t);
  416. void dst_trans_put(struct dst_trans *t);
  417. /*
  418. * Convert bio into network command.
  419. */
  420. static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd,
  421. u32 command, u64 id)
  422. {
  423. cmd->cmd = command;
  424. cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS;
  425. cmd->rw = bio->bi_rw;
  426. cmd->size = bio->bi_size;
  427. cmd->csize = 0;
  428. cmd->id = id;
  429. cmd->sector = bio->bi_sector;
  430. };
  431. int dst_trans_send(struct dst_trans *t);
  432. int dst_trans_crypto(struct dst_trans *t);
  433. int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl);
  434. void dst_node_crypto_exit(struct dst_node *n);
  435. static inline int dst_need_crypto(struct dst_node *n)
  436. {
  437. struct dst_crypto_ctl *c = &n->crypto;
  438. /*
  439. * Logical OR is appropriate here, but boolean one produces
  440. * more optimal code, so it is used instead.
  441. */
  442. return (c->hash_algo[0] | c->cipher_algo[0]);
  443. }
  444. int dst_node_trans_init(struct dst_node *n, unsigned int size);
  445. void dst_node_trans_exit(struct dst_node *n);
  446. /*
  447. * Pool of threads.
  448. * Ready list contains threads currently free to be used,
  449. * active one contains threads with some work scheduled for them.
  450. * Caller can wait in given queue when thread is ready.
  451. */
  452. struct thread_pool
  453. {
  454. int thread_num;
  455. struct mutex thread_lock;
  456. struct list_head ready_list, active_list;
  457. wait_queue_head_t wait;
  458. };
  459. void thread_pool_del_worker(struct thread_pool *p);
  460. void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id);
  461. int thread_pool_add_worker(struct thread_pool *p,
  462. char *name,
  463. unsigned int id,
  464. void *(* init)(void *data),
  465. void (* cleanup)(void *data),
  466. void *data);
  467. void thread_pool_destroy(struct thread_pool *p);
  468. struct thread_pool *thread_pool_create(int num, char *name,
  469. void *(* init)(void *data),
  470. void (* cleanup)(void *data),
  471. void *data);
  472. int thread_pool_schedule(struct thread_pool *p,
  473. int (* setup)(void *stored_private, void *setup_data),
  474. int (* action)(void *stored_private, void *setup_data),
  475. void *setup_data, long timeout);
  476. int thread_pool_schedule_private(struct thread_pool *p,
  477. int (* setup)(void *private, void *data),
  478. int (* action)(void *private, void *data),
  479. void *data, long timeout, void *id);
  480. #endif /* __KERNEL__ */
  481. #endif /* __DST_H */