nfs4filelayoutdev.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. /*
  2. * Device operations for the pnfs nfs4 file layout driver.
  3. *
  4. * Copyright (c) 2002
  5. * The Regents of the University of Michigan
  6. * All Rights Reserved
  7. *
  8. * Dean Hildebrand <dhildebz@umich.edu>
  9. * Garth Goodson <Garth.Goodson@netapp.com>
  10. *
  11. * Permission is granted to use, copy, create derivative works, and
  12. * redistribute this software and such derivative works for any purpose,
  13. * so long as the name of the University of Michigan is not used in
  14. * any advertising or publicity pertaining to the use or distribution
  15. * of this software without specific, written prior authorization. If
  16. * the above copyright notice or any other identification of the
  17. * University of Michigan is included in any copy of any portion of
  18. * this software, then the disclaimer below must also be included.
  19. *
  20. * This software is provided as is, without representation or warranty
  21. * of any kind either express or implied, including without limitation
  22. * the implied warranties of merchantability, fitness for a particular
  23. * purpose, or noninfringement. The Regents of the University of
  24. * Michigan shall not be liable for any damages, including special,
  25. * indirect, incidental, or consequential damages, with respect to any
  26. * claim arising out of or in connection with the use of the software,
  27. * even if it has been or is hereafter advised of the possibility of
  28. * such damages.
  29. */
  30. #include <linux/nfs_fs.h>
  31. #include <linux/vmalloc.h>
  32. #include "internal.h"
  33. #include "nfs4filelayout.h"
  34. #define NFSDBG_FACILITY NFSDBG_PNFS_LD
  35. /*
  36. * Data server cache
  37. *
  38. * Data servers can be mapped to different device ids.
  39. * nfs4_pnfs_ds reference counting
  40. * - set to 1 on allocation
  41. * - incremented when a device id maps a data server already in the cache.
  42. * - decremented when deviceid is removed from the cache.
  43. */
  44. DEFINE_SPINLOCK(nfs4_ds_cache_lock);
  45. static LIST_HEAD(nfs4_data_server_cache);
  46. /* Debug routines */
  47. void
  48. print_ds(struct nfs4_pnfs_ds *ds)
  49. {
  50. if (ds == NULL) {
  51. printk("%s NULL device\n", __func__);
  52. return;
  53. }
  54. printk(" ip_addr %x port %hu\n"
  55. " ref count %d\n"
  56. " client %p\n"
  57. " cl_exchange_flags %x\n",
  58. ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
  59. atomic_read(&ds->ds_count), ds->ds_clp,
  60. ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
  61. }
  62. void
  63. print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr)
  64. {
  65. int i;
  66. ifdebug(FACILITY) {
  67. printk("%s dsaddr->ds_num %d\n", __func__,
  68. dsaddr->ds_num);
  69. for (i = 0; i < dsaddr->ds_num; i++)
  70. print_ds(dsaddr->ds_list[i]);
  71. }
  72. }
  73. void print_deviceid(struct nfs4_deviceid *id)
  74. {
  75. u32 *p = (u32 *)id;
  76. dprintk("%s: device id= [%x%x%x%x]\n", __func__,
  77. p[0], p[1], p[2], p[3]);
  78. }
  79. /* nfs4_ds_cache_lock is held */
  80. static struct nfs4_pnfs_ds *
  81. _data_server_lookup_locked(u32 ip_addr, u32 port)
  82. {
  83. struct nfs4_pnfs_ds *ds;
  84. dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
  85. ntohl(ip_addr), ntohs(port));
  86. list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
  87. if (ds->ds_ip_addr == ip_addr &&
  88. ds->ds_port == port) {
  89. return ds;
  90. }
  91. }
  92. return NULL;
  93. }
  94. /*
  95. * Create an rpc connection to the nfs4_pnfs_ds data server
  96. * Currently only support IPv4
  97. */
  98. static int
  99. nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
  100. {
  101. struct nfs_client *clp;
  102. struct sockaddr_in sin;
  103. int status = 0;
  104. dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__,
  105. ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
  106. mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
  107. sin.sin_family = AF_INET;
  108. sin.sin_addr.s_addr = ds->ds_ip_addr;
  109. sin.sin_port = ds->ds_port;
  110. clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
  111. sizeof(sin), IPPROTO_TCP);
  112. if (IS_ERR(clp)) {
  113. status = PTR_ERR(clp);
  114. goto out;
  115. }
  116. if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) {
  117. if (!is_ds_client(clp)) {
  118. status = -ENODEV;
  119. goto out_put;
  120. }
  121. ds->ds_clp = clp;
  122. dprintk("%s [existing] ip=%x, port=%hu\n", __func__,
  123. ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
  124. goto out;
  125. }
  126. /*
  127. * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to
  128. * be equal to the MDS lease. Renewal is scheduled in create_session.
  129. */
  130. spin_lock(&mds_srv->nfs_client->cl_lock);
  131. clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
  132. spin_unlock(&mds_srv->nfs_client->cl_lock);
  133. clp->cl_last_renewal = jiffies;
  134. /* New nfs_client */
  135. status = nfs4_init_ds_session(clp);
  136. if (status)
  137. goto out_put;
  138. ds->ds_clp = clp;
  139. dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr),
  140. ntohs(ds->ds_port));
  141. out:
  142. return status;
  143. out_put:
  144. nfs_put_client(clp);
  145. goto out;
  146. }
  147. static void
  148. destroy_ds(struct nfs4_pnfs_ds *ds)
  149. {
  150. dprintk("--> %s\n", __func__);
  151. ifdebug(FACILITY)
  152. print_ds(ds);
  153. if (ds->ds_clp)
  154. nfs_put_client(ds->ds_clp);
  155. kfree(ds);
  156. }
  157. static void
  158. nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
  159. {
  160. struct nfs4_pnfs_ds *ds;
  161. int i;
  162. print_deviceid(&dsaddr->deviceid.de_id);
  163. for (i = 0; i < dsaddr->ds_num; i++) {
  164. ds = dsaddr->ds_list[i];
  165. if (ds != NULL) {
  166. if (atomic_dec_and_lock(&ds->ds_count,
  167. &nfs4_ds_cache_lock)) {
  168. list_del_init(&ds->ds_node);
  169. spin_unlock(&nfs4_ds_cache_lock);
  170. destroy_ds(ds);
  171. }
  172. }
  173. }
  174. kfree(dsaddr->stripe_indices);
  175. kfree(dsaddr);
  176. }
  177. void
  178. nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device)
  179. {
  180. struct nfs4_file_layout_dsaddr *dsaddr =
  181. container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
  182. nfs4_fl_free_deviceid(dsaddr);
  183. }
  184. static struct nfs4_pnfs_ds *
  185. nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
  186. {
  187. struct nfs4_pnfs_ds *tmp_ds, *ds;
  188. ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
  189. if (!ds)
  190. goto out;
  191. spin_lock(&nfs4_ds_cache_lock);
  192. tmp_ds = _data_server_lookup_locked(ip_addr, port);
  193. if (tmp_ds == NULL) {
  194. ds->ds_ip_addr = ip_addr;
  195. ds->ds_port = port;
  196. atomic_set(&ds->ds_count, 1);
  197. INIT_LIST_HEAD(&ds->ds_node);
  198. ds->ds_clp = NULL;
  199. list_add(&ds->ds_node, &nfs4_data_server_cache);
  200. dprintk("%s add new data server ip 0x%x\n", __func__,
  201. ds->ds_ip_addr);
  202. } else {
  203. kfree(ds);
  204. atomic_inc(&tmp_ds->ds_count);
  205. dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
  206. __func__, tmp_ds->ds_ip_addr,
  207. atomic_read(&tmp_ds->ds_count));
  208. ds = tmp_ds;
  209. }
  210. spin_unlock(&nfs4_ds_cache_lock);
  211. out:
  212. return ds;
  213. }
  214. /*
  215. * Currently only support ipv4, and one multi-path address.
  216. */
  217. static struct nfs4_pnfs_ds *
  218. decode_and_add_ds(__be32 **pp, struct inode *inode)
  219. {
  220. struct nfs4_pnfs_ds *ds = NULL;
  221. char *buf;
  222. const char *ipend, *pstr;
  223. u32 ip_addr, port;
  224. int nlen, rlen, i;
  225. int tmp[2];
  226. __be32 *r_netid, *r_addr, *p = *pp;
  227. /* r_netid */
  228. nlen = be32_to_cpup(p++);
  229. r_netid = p;
  230. p += XDR_QUADLEN(nlen);
  231. /* r_addr */
  232. rlen = be32_to_cpup(p++);
  233. r_addr = p;
  234. p += XDR_QUADLEN(rlen);
  235. *pp = p;
  236. /* Check that netid is "tcp" */
  237. if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) {
  238. dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
  239. goto out_err;
  240. }
  241. /* ipv6 length plus port is legal */
  242. if (rlen > INET6_ADDRSTRLEN + 8) {
  243. dprintk("%s: Invalid address, length %d\n", __func__,
  244. rlen);
  245. goto out_err;
  246. }
  247. buf = kmalloc(rlen + 1, GFP_KERNEL);
  248. if (!buf) {
  249. dprintk("%s: Not enough memory\n", __func__);
  250. goto out_err;
  251. }
  252. buf[rlen] = '\0';
  253. memcpy(buf, r_addr, rlen);
  254. /* replace the port dots with dashes for the in4_pton() delimiter*/
  255. for (i = 0; i < 2; i++) {
  256. char *res = strrchr(buf, '.');
  257. if (!res) {
  258. dprintk("%s: Failed finding expected dots in port\n",
  259. __func__);
  260. goto out_free;
  261. }
  262. *res = '-';
  263. }
  264. /* Currently only support ipv4 address */
  265. if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) {
  266. dprintk("%s: Only ipv4 addresses supported\n", __func__);
  267. goto out_free;
  268. }
  269. /* port */
  270. pstr = ipend;
  271. sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
  272. port = htons((tmp[0] << 8) | (tmp[1]));
  273. ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
  274. dprintk("%s: Decoded address and port %s\n", __func__, buf);
  275. out_free:
  276. kfree(buf);
  277. out_err:
  278. return ds;
  279. }
  280. /* Decode opaque device data and return the result */
  281. static struct nfs4_file_layout_dsaddr*
  282. decode_device(struct inode *ino, struct pnfs_device *pdev)
  283. {
  284. int i, dummy;
  285. u32 cnt, num;
  286. u8 *indexp;
  287. __be32 *p = (__be32 *)pdev->area, *indicesp;
  288. struct nfs4_file_layout_dsaddr *dsaddr;
  289. /* Get the stripe count (number of stripe index) */
  290. cnt = be32_to_cpup(p++);
  291. dprintk("%s stripe count %d\n", __func__, cnt);
  292. if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
  293. printk(KERN_WARNING "%s: stripe count %d greater than "
  294. "supported maximum %d\n", __func__,
  295. cnt, NFS4_PNFS_MAX_STRIPE_CNT);
  296. goto out_err;
  297. }
  298. /* Check the multipath list count */
  299. indicesp = p;
  300. p += XDR_QUADLEN(cnt << 2);
  301. num = be32_to_cpup(p++);
  302. dprintk("%s ds_num %u\n", __func__, num);
  303. if (num > NFS4_PNFS_MAX_MULTI_CNT) {
  304. printk(KERN_WARNING "%s: multipath count %d greater than "
  305. "supported maximum %d\n", __func__,
  306. num, NFS4_PNFS_MAX_MULTI_CNT);
  307. goto out_err;
  308. }
  309. dsaddr = kzalloc(sizeof(*dsaddr) +
  310. (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
  311. GFP_KERNEL);
  312. if (!dsaddr)
  313. goto out_err;
  314. dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
  315. if (!dsaddr->stripe_indices)
  316. goto out_err_free;
  317. dsaddr->stripe_count = cnt;
  318. dsaddr->ds_num = num;
  319. memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id));
  320. /* Go back an read stripe indices */
  321. p = indicesp;
  322. indexp = &dsaddr->stripe_indices[0];
  323. for (i = 0; i < dsaddr->stripe_count; i++) {
  324. *indexp = be32_to_cpup(p++);
  325. if (*indexp >= num)
  326. goto out_err_free;
  327. indexp++;
  328. }
  329. /* Skip already read multipath list count */
  330. p++;
  331. for (i = 0; i < dsaddr->ds_num; i++) {
  332. int j;
  333. dummy = be32_to_cpup(p++); /* multipath count */
  334. if (dummy > 1) {
  335. printk(KERN_WARNING
  336. "%s: Multipath count %d not supported, "
  337. "skipping all greater than 1\n", __func__,
  338. dummy);
  339. }
  340. for (j = 0; j < dummy; j++) {
  341. if (j == 0) {
  342. dsaddr->ds_list[i] = decode_and_add_ds(&p, ino);
  343. if (dsaddr->ds_list[i] == NULL)
  344. goto out_err_free;
  345. } else {
  346. u32 len;
  347. /* skip extra multipath */
  348. len = be32_to_cpup(p++);
  349. p += XDR_QUADLEN(len);
  350. len = be32_to_cpup(p++);
  351. p += XDR_QUADLEN(len);
  352. continue;
  353. }
  354. }
  355. }
  356. return dsaddr;
  357. out_err_free:
  358. nfs4_fl_free_deviceid(dsaddr);
  359. out_err:
  360. dprintk("%s ERROR: returning NULL\n", __func__);
  361. return NULL;
  362. }
  363. /*
  364. * Decode the opaque device specified in 'dev'
  365. * and add it to the list of available devices.
  366. * If the deviceid is already cached, nfs4_add_deviceid will return
  367. * a pointer to the cached struct and throw away the new.
  368. */
  369. static struct nfs4_file_layout_dsaddr*
  370. decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
  371. {
  372. struct nfs4_file_layout_dsaddr *dsaddr;
  373. struct pnfs_deviceid_node *d;
  374. dsaddr = decode_device(inode, dev);
  375. if (!dsaddr) {
  376. printk(KERN_WARNING "%s: Could not decode or add device\n",
  377. __func__);
  378. return NULL;
  379. }
  380. d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
  381. &dsaddr->deviceid);
  382. return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
  383. }
  384. /*
  385. * Retrieve the information for dev_id, add it to the list
  386. * of available devices, and return it.
  387. */
  388. struct nfs4_file_layout_dsaddr *
  389. get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
  390. {
  391. struct pnfs_device *pdev = NULL;
  392. u32 max_resp_sz;
  393. int max_pages;
  394. struct page **pages = NULL;
  395. struct nfs4_file_layout_dsaddr *dsaddr = NULL;
  396. int rc, i;
  397. struct nfs_server *server = NFS_SERVER(inode);
  398. /*
  399. * Use the session max response size as the basis for setting
  400. * GETDEVICEINFO's maxcount
  401. */
  402. max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
  403. max_pages = max_resp_sz >> PAGE_SHIFT;
  404. dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
  405. __func__, inode, max_resp_sz, max_pages);
  406. pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
  407. if (pdev == NULL)
  408. return NULL;
  409. pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
  410. if (pages == NULL) {
  411. kfree(pdev);
  412. return NULL;
  413. }
  414. for (i = 0; i < max_pages; i++) {
  415. pages[i] = alloc_page(GFP_KERNEL);
  416. if (!pages[i])
  417. goto out_free;
  418. }
  419. /* set pdev->area */
  420. pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
  421. if (!pdev->area)
  422. goto out_free;
  423. memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
  424. pdev->layout_type = LAYOUT_NFSV4_1_FILES;
  425. pdev->pages = pages;
  426. pdev->pgbase = 0;
  427. pdev->pglen = PAGE_SIZE * max_pages;
  428. pdev->mincount = 0;
  429. rc = nfs4_proc_getdeviceinfo(server, pdev);
  430. dprintk("%s getdevice info returns %d\n", __func__, rc);
  431. if (rc)
  432. goto out_free;
  433. /*
  434. * Found new device, need to decode it and then add it to the
  435. * list of known devices for this mountpoint.
  436. */
  437. dsaddr = decode_and_add_device(inode, pdev);
  438. out_free:
  439. if (pdev->area != NULL)
  440. vunmap(pdev->area);
  441. for (i = 0; i < max_pages; i++)
  442. __free_page(pages[i]);
  443. kfree(pages);
  444. kfree(pdev);
  445. dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
  446. return dsaddr;
  447. }
  448. struct nfs4_file_layout_dsaddr *
  449. nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id)
  450. {
  451. struct pnfs_deviceid_node *d;
  452. d = pnfs_find_get_deviceid(clp->cl_devid_cache, id);
  453. return (d == NULL) ? NULL :
  454. container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
  455. }