volume.c 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. /* AFS volume management
  2. *
  3. * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #include <linux/kernel.h>
  12. #include <linux/module.h>
  13. #include <linux/init.h>
  14. #include <linux/slab.h>
  15. #include <linux/fs.h>
  16. #include <linux/pagemap.h>
  17. #include <linux/sched.h>
  18. #include "internal.h"
  19. static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
  20. /*
  21. * lookup a volume by name
  22. * - this can be one of the following:
  23. * "%[cell:]volume[.]" R/W volume
  24. * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0),
  25. * or R/W (rwparent=1) volume
  26. * "%[cell:]volume.readonly" R/O volume
  27. * "#[cell:]volume.readonly" R/O volume
  28. * "%[cell:]volume.backup" Backup volume
  29. * "#[cell:]volume.backup" Backup volume
  30. *
  31. * The cell name is optional, and defaults to the current cell.
  32. *
  33. * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
  34. * Guide
  35. * - Rule 1: Explicit type suffix forces access of that type or nothing
  36. * (no suffix, then use Rule 2 & 3)
  37. * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
  38. * if not available
  39. * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
  40. * explicitly told otherwise
  41. */
  42. struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
  43. {
  44. struct afs_vlocation *vlocation = NULL;
  45. struct afs_volume *volume = NULL;
  46. struct afs_server *server = NULL;
  47. char srvtmask;
  48. int ret, loop;
  49. _enter("{%*.*s,%d}",
  50. params->volnamesz, params->volnamesz, params->volname, params->rwpath);
  51. /* lookup the volume location record */
  52. vlocation = afs_vlocation_lookup(params->cell, params->key,
  53. params->volname, params->volnamesz);
  54. if (IS_ERR(vlocation)) {
  55. ret = PTR_ERR(vlocation);
  56. vlocation = NULL;
  57. goto error;
  58. }
  59. /* make the final decision on the type we want */
  60. ret = -ENOMEDIUM;
  61. if (params->force && !(vlocation->vldb.vidmask & (1 << params->type)))
  62. goto error;
  63. srvtmask = 0;
  64. for (loop = 0; loop < vlocation->vldb.nservers; loop++)
  65. srvtmask |= vlocation->vldb.srvtmask[loop];
  66. if (params->force) {
  67. if (!(srvtmask & (1 << params->type)))
  68. goto error;
  69. } else if (srvtmask & AFS_VOL_VTM_RO) {
  70. params->type = AFSVL_ROVOL;
  71. } else if (srvtmask & AFS_VOL_VTM_RW) {
  72. params->type = AFSVL_RWVOL;
  73. } else {
  74. goto error;
  75. }
  76. down_write(&params->cell->vl_sem);
  77. /* is the volume already active? */
  78. if (vlocation->vols[params->type]) {
  79. /* yes - re-use it */
  80. volume = vlocation->vols[params->type];
  81. afs_get_volume(volume);
  82. goto success;
  83. }
  84. /* create a new volume record */
  85. _debug("creating new volume record");
  86. ret = -ENOMEM;
  87. volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
  88. if (!volume)
  89. goto error_up;
  90. atomic_set(&volume->usage, 1);
  91. volume->type = params->type;
  92. volume->type_force = params->force;
  93. volume->cell = params->cell;
  94. volume->vid = vlocation->vldb.vid[params->type];
  95. init_rwsem(&volume->server_sem);
  96. /* look up all the applicable server records */
  97. for (loop = 0; loop < 8; loop++) {
  98. if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
  99. server = afs_lookup_server(
  100. volume->cell, &vlocation->vldb.servers[loop]);
  101. if (IS_ERR(server)) {
  102. ret = PTR_ERR(server);
  103. goto error_discard;
  104. }
  105. volume->servers[volume->nservers] = server;
  106. volume->nservers++;
  107. }
  108. }
  109. /* attach the cache and volume location */
  110. #ifdef CONFIG_AFS_FSCACHE
  111. volume->cache = fscache_acquire_cookie(vlocation->cache,
  112. &afs_volume_cache_index_def,
  113. volume);
  114. #endif
  115. afs_get_vlocation(vlocation);
  116. volume->vlocation = vlocation;
  117. vlocation->vols[volume->type] = volume;
  118. success:
  119. _debug("kAFS selected %s volume %08x",
  120. afs_voltypes[volume->type], volume->vid);
  121. up_write(&params->cell->vl_sem);
  122. afs_put_vlocation(vlocation);
  123. _leave(" = %p", volume);
  124. return volume;
  125. /* clean up */
  126. error_up:
  127. up_write(&params->cell->vl_sem);
  128. error:
  129. afs_put_vlocation(vlocation);
  130. _leave(" = %d", ret);
  131. return ERR_PTR(ret);
  132. error_discard:
  133. up_write(&params->cell->vl_sem);
  134. for (loop = volume->nservers - 1; loop >= 0; loop--)
  135. afs_put_server(volume->servers[loop]);
  136. kfree(volume);
  137. goto error;
  138. }
  139. /*
  140. * destroy a volume record
  141. */
  142. void afs_put_volume(struct afs_volume *volume)
  143. {
  144. struct afs_vlocation *vlocation;
  145. int loop;
  146. if (!volume)
  147. return;
  148. _enter("%p", volume);
  149. ASSERTCMP(atomic_read(&volume->usage), >, 0);
  150. vlocation = volume->vlocation;
  151. /* to prevent a race, the decrement and the dequeue must be effectively
  152. * atomic */
  153. down_write(&vlocation->cell->vl_sem);
  154. if (likely(!atomic_dec_and_test(&volume->usage))) {
  155. up_write(&vlocation->cell->vl_sem);
  156. _leave("");
  157. return;
  158. }
  159. vlocation->vols[volume->type] = NULL;
  160. up_write(&vlocation->cell->vl_sem);
  161. /* finish cleaning up the volume */
  162. #ifdef CONFIG_AFS_FSCACHE
  163. fscache_relinquish_cookie(volume->cache, 0);
  164. #endif
  165. afs_put_vlocation(vlocation);
  166. for (loop = volume->nservers - 1; loop >= 0; loop--)
  167. afs_put_server(volume->servers[loop]);
  168. kfree(volume);
  169. _leave(" [destroyed]");
  170. }
  171. /*
  172. * pick a server to use to try accessing this volume
  173. * - returns with an elevated usage count on the server chosen
  174. */
  175. struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
  176. {
  177. struct afs_volume *volume = vnode->volume;
  178. struct afs_server *server;
  179. int ret, state, loop;
  180. _enter("%s", volume->vlocation->vldb.name);
  181. /* stick with the server we're already using if we can */
  182. if (vnode->server && vnode->server->fs_state == 0) {
  183. afs_get_server(vnode->server);
  184. _leave(" = %p [current]", vnode->server);
  185. return vnode->server;
  186. }
  187. down_read(&volume->server_sem);
  188. /* handle the no-server case */
  189. if (volume->nservers == 0) {
  190. ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
  191. up_read(&volume->server_sem);
  192. _leave(" = %d [no servers]", ret);
  193. return ERR_PTR(ret);
  194. }
  195. /* basically, just search the list for the first live server and use
  196. * that */
  197. ret = 0;
  198. for (loop = 0; loop < volume->nservers; loop++) {
  199. server = volume->servers[loop];
  200. state = server->fs_state;
  201. _debug("consider %d [%d]", loop, state);
  202. switch (state) {
  203. /* found an apparently healthy server */
  204. case 0:
  205. afs_get_server(server);
  206. up_read(&volume->server_sem);
  207. _leave(" = %p (picked %08x)",
  208. server, ntohl(server->addr.s_addr));
  209. return server;
  210. case -ENETUNREACH:
  211. if (ret == 0)
  212. ret = state;
  213. break;
  214. case -EHOSTUNREACH:
  215. if (ret == 0 ||
  216. ret == -ENETUNREACH)
  217. ret = state;
  218. break;
  219. case -ECONNREFUSED:
  220. if (ret == 0 ||
  221. ret == -ENETUNREACH ||
  222. ret == -EHOSTUNREACH)
  223. ret = state;
  224. break;
  225. default:
  226. case -EREMOTEIO:
  227. if (ret == 0 ||
  228. ret == -ENETUNREACH ||
  229. ret == -EHOSTUNREACH ||
  230. ret == -ECONNREFUSED)
  231. ret = state;
  232. break;
  233. }
  234. }
  235. /* no available servers
  236. * - TODO: handle the no active servers case better
  237. */
  238. up_read(&volume->server_sem);
  239. _leave(" = %d", ret);
  240. return ERR_PTR(ret);
  241. }
  242. /*
  243. * release a server after use
  244. * - releases the ref on the server struct that was acquired by picking
  245. * - records result of using a particular server to access a volume
  246. * - return 0 to try again, 1 if okay or to issue error
  247. * - the caller must release the server struct if result was 0
  248. */
  249. int afs_volume_release_fileserver(struct afs_vnode *vnode,
  250. struct afs_server *server,
  251. int result)
  252. {
  253. struct afs_volume *volume = vnode->volume;
  254. unsigned loop;
  255. _enter("%s,%08x,%d",
  256. volume->vlocation->vldb.name, ntohl(server->addr.s_addr),
  257. result);
  258. switch (result) {
  259. /* success */
  260. case 0:
  261. server->fs_act_jif = jiffies;
  262. server->fs_state = 0;
  263. _leave("");
  264. return 1;
  265. /* the fileserver denied all knowledge of the volume */
  266. case -ENOMEDIUM:
  267. server->fs_act_jif = jiffies;
  268. down_write(&volume->server_sem);
  269. /* firstly, find where the server is in the active list (if it
  270. * is) */
  271. for (loop = 0; loop < volume->nservers; loop++)
  272. if (volume->servers[loop] == server)
  273. goto present;
  274. /* no longer there - may have been discarded by another op */
  275. goto try_next_server_upw;
  276. present:
  277. volume->nservers--;
  278. memmove(&volume->servers[loop],
  279. &volume->servers[loop + 1],
  280. sizeof(volume->servers[loop]) *
  281. (volume->nservers - loop));
  282. volume->servers[volume->nservers] = NULL;
  283. afs_put_server(server);
  284. volume->rjservers++;
  285. if (volume->nservers > 0)
  286. /* another server might acknowledge its existence */
  287. goto try_next_server_upw;
  288. /* handle the case where all the fileservers have rejected the
  289. * volume
  290. * - TODO: try asking the fileservers for volume information
  291. * - TODO: contact the VL server again to see if the volume is
  292. * no longer registered
  293. */
  294. up_write(&volume->server_sem);
  295. afs_put_server(server);
  296. _leave(" [completely rejected]");
  297. return 1;
  298. /* problem reaching the server */
  299. case -ENETUNREACH:
  300. case -EHOSTUNREACH:
  301. case -ECONNREFUSED:
  302. case -ETIME:
  303. case -ETIMEDOUT:
  304. case -EREMOTEIO:
  305. /* mark the server as dead
  306. * TODO: vary dead timeout depending on error
  307. */
  308. spin_lock(&server->fs_lock);
  309. if (!server->fs_state) {
  310. server->fs_dead_jif = jiffies + HZ * 10;
  311. server->fs_state = result;
  312. printk("kAFS: SERVER DEAD state=%d\n", result);
  313. }
  314. spin_unlock(&server->fs_lock);
  315. goto try_next_server;
  316. /* miscellaneous error */
  317. default:
  318. server->fs_act_jif = jiffies;
  319. case -ENOMEM:
  320. case -ENONET:
  321. /* tell the caller to accept the result */
  322. afs_put_server(server);
  323. _leave(" [local failure]");
  324. return 1;
  325. }
  326. /* tell the caller to loop around and try the next server */
  327. try_next_server_upw:
  328. up_write(&volume->server_sem);
  329. try_next_server:
  330. afs_put_server(server);
  331. _leave(" [try next server]");
  332. return 0;
  333. }