volume.c 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. /* AFS volume management
  2. *
  3. * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #include <linux/kernel.h>
  12. #include <linux/module.h>
  13. #include <linux/init.h>
  14. #include <linux/slab.h>
  15. #include <linux/fs.h>
  16. #include <linux/pagemap.h>
  17. #include <linux/sched.h>
  18. #include "internal.h"
  19. static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
  20. /*
  21. * lookup a volume by name
  22. * - this can be one of the following:
  23. * "%[cell:]volume[.]" R/W volume
  24. * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0),
  25. * or R/W (rwparent=1) volume
  26. * "%[cell:]volume.readonly" R/O volume
  27. * "#[cell:]volume.readonly" R/O volume
  28. * "%[cell:]volume.backup" Backup volume
  29. * "#[cell:]volume.backup" Backup volume
  30. *
  31. * The cell name is optional, and defaults to the current cell.
  32. *
  33. * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
  34. * Guide
  35. * - Rule 1: Explicit type suffix forces access of that type or nothing
  36. * (no suffix, then use Rule 2 & 3)
  37. * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
  38. * if not available
  39. * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
  40. * explicitly told otherwise
  41. */
  42. struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
  43. {
  44. struct afs_vlocation *vlocation = NULL;
  45. struct afs_volume *volume = NULL;
  46. struct afs_server *server = NULL;
  47. char srvtmask;
  48. int ret, loop;
  49. _enter("{%*.*s,%d}",
  50. params->volnamesz, params->volnamesz, params->volname, params->rwpath);
  51. /* lookup the volume location record */
  52. vlocation = afs_vlocation_lookup(params->cell, params->key,
  53. params->volname, params->volnamesz);
  54. if (IS_ERR(vlocation)) {
  55. ret = PTR_ERR(vlocation);
  56. vlocation = NULL;
  57. goto error;
  58. }
  59. /* make the final decision on the type we want */
  60. ret = -ENOMEDIUM;
  61. if (params->force && !(vlocation->vldb.vidmask & (1 << params->type)))
  62. goto error;
  63. srvtmask = 0;
  64. for (loop = 0; loop < vlocation->vldb.nservers; loop++)
  65. srvtmask |= vlocation->vldb.srvtmask[loop];
  66. if (params->force) {
  67. if (!(srvtmask & (1 << params->type)))
  68. goto error;
  69. } else if (srvtmask & AFS_VOL_VTM_RO) {
  70. params->type = AFSVL_ROVOL;
  71. } else if (srvtmask & AFS_VOL_VTM_RW) {
  72. params->type = AFSVL_RWVOL;
  73. } else {
  74. goto error;
  75. }
  76. down_write(&params->cell->vl_sem);
  77. /* is the volume already active? */
  78. if (vlocation->vols[params->type]) {
  79. /* yes - re-use it */
  80. volume = vlocation->vols[params->type];
  81. afs_get_volume(volume);
  82. goto success;
  83. }
  84. /* create a new volume record */
  85. _debug("creating new volume record");
  86. ret = -ENOMEM;
  87. volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
  88. if (!volume)
  89. goto error_up;
  90. atomic_set(&volume->usage, 1);
  91. volume->type = params->type;
  92. volume->type_force = params->force;
  93. volume->cell = params->cell;
  94. volume->vid = vlocation->vldb.vid[params->type];
  95. init_rwsem(&volume->server_sem);
  96. /* look up all the applicable server records */
  97. for (loop = 0; loop < 8; loop++) {
  98. if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
  99. server = afs_lookup_server(
  100. volume->cell, &vlocation->vldb.servers[loop]);
  101. if (IS_ERR(server)) {
  102. ret = PTR_ERR(server);
  103. goto error_discard;
  104. }
  105. volume->servers[volume->nservers] = server;
  106. volume->nservers++;
  107. }
  108. }
  109. /* attach the cache and volume location */
  110. #ifdef AFS_CACHING_SUPPORT
  111. cachefs_acquire_cookie(vlocation->cache,
  112. &afs_vnode_cache_index_def,
  113. volume,
  114. &volume->cache);
  115. #endif
  116. afs_get_vlocation(vlocation);
  117. volume->vlocation = vlocation;
  118. vlocation->vols[volume->type] = volume;
  119. success:
  120. _debug("kAFS selected %s volume %08x",
  121. afs_voltypes[volume->type], volume->vid);
  122. up_write(&params->cell->vl_sem);
  123. afs_put_vlocation(vlocation);
  124. _leave(" = %p", volume);
  125. return volume;
  126. /* clean up */
  127. error_up:
  128. up_write(&params->cell->vl_sem);
  129. error:
  130. afs_put_vlocation(vlocation);
  131. _leave(" = %d", ret);
  132. return ERR_PTR(ret);
  133. error_discard:
  134. up_write(&params->cell->vl_sem);
  135. for (loop = volume->nservers - 1; loop >= 0; loop--)
  136. afs_put_server(volume->servers[loop]);
  137. kfree(volume);
  138. goto error;
  139. }
  140. /*
  141. * destroy a volume record
  142. */
  143. void afs_put_volume(struct afs_volume *volume)
  144. {
  145. struct afs_vlocation *vlocation;
  146. int loop;
  147. if (!volume)
  148. return;
  149. _enter("%p", volume);
  150. ASSERTCMP(atomic_read(&volume->usage), >, 0);
  151. vlocation = volume->vlocation;
  152. /* to prevent a race, the decrement and the dequeue must be effectively
  153. * atomic */
  154. down_write(&vlocation->cell->vl_sem);
  155. if (likely(!atomic_dec_and_test(&volume->usage))) {
  156. up_write(&vlocation->cell->vl_sem);
  157. _leave("");
  158. return;
  159. }
  160. vlocation->vols[volume->type] = NULL;
  161. up_write(&vlocation->cell->vl_sem);
  162. /* finish cleaning up the volume */
  163. #ifdef AFS_CACHING_SUPPORT
  164. cachefs_relinquish_cookie(volume->cache, 0);
  165. #endif
  166. afs_put_vlocation(vlocation);
  167. for (loop = volume->nservers - 1; loop >= 0; loop--)
  168. afs_put_server(volume->servers[loop]);
  169. kfree(volume);
  170. _leave(" [destroyed]");
  171. }
  172. /*
  173. * pick a server to use to try accessing this volume
  174. * - returns with an elevated usage count on the server chosen
  175. */
  176. struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
  177. {
  178. struct afs_volume *volume = vnode->volume;
  179. struct afs_server *server;
  180. int ret, state, loop;
  181. _enter("%s", volume->vlocation->vldb.name);
  182. /* stick with the server we're already using if we can */
  183. if (vnode->server && vnode->server->fs_state == 0) {
  184. afs_get_server(vnode->server);
  185. _leave(" = %p [current]", vnode->server);
  186. return vnode->server;
  187. }
  188. down_read(&volume->server_sem);
  189. /* handle the no-server case */
  190. if (volume->nservers == 0) {
  191. ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
  192. up_read(&volume->server_sem);
  193. _leave(" = %d [no servers]", ret);
  194. return ERR_PTR(ret);
  195. }
  196. /* basically, just search the list for the first live server and use
  197. * that */
  198. ret = 0;
  199. for (loop = 0; loop < volume->nservers; loop++) {
  200. server = volume->servers[loop];
  201. state = server->fs_state;
  202. _debug("consider %d [%d]", loop, state);
  203. switch (state) {
  204. /* found an apparently healthy server */
  205. case 0:
  206. afs_get_server(server);
  207. up_read(&volume->server_sem);
  208. _leave(" = %p (picked %08x)",
  209. server, ntohl(server->addr.s_addr));
  210. return server;
  211. case -ENETUNREACH:
  212. if (ret == 0)
  213. ret = state;
  214. break;
  215. case -EHOSTUNREACH:
  216. if (ret == 0 ||
  217. ret == -ENETUNREACH)
  218. ret = state;
  219. break;
  220. case -ECONNREFUSED:
  221. if (ret == 0 ||
  222. ret == -ENETUNREACH ||
  223. ret == -EHOSTUNREACH)
  224. ret = state;
  225. break;
  226. default:
  227. case -EREMOTEIO:
  228. if (ret == 0 ||
  229. ret == -ENETUNREACH ||
  230. ret == -EHOSTUNREACH ||
  231. ret == -ECONNREFUSED)
  232. ret = state;
  233. break;
  234. }
  235. }
  236. /* no available servers
  237. * - TODO: handle the no active servers case better
  238. */
  239. up_read(&volume->server_sem);
  240. _leave(" = %d", ret);
  241. return ERR_PTR(ret);
  242. }
  243. /*
  244. * release a server after use
  245. * - releases the ref on the server struct that was acquired by picking
  246. * - records result of using a particular server to access a volume
  247. * - return 0 to try again, 1 if okay or to issue error
  248. * - the caller must release the server struct if result was 0
  249. */
  250. int afs_volume_release_fileserver(struct afs_vnode *vnode,
  251. struct afs_server *server,
  252. int result)
  253. {
  254. struct afs_volume *volume = vnode->volume;
  255. unsigned loop;
  256. _enter("%s,%08x,%d",
  257. volume->vlocation->vldb.name, ntohl(server->addr.s_addr),
  258. result);
  259. switch (result) {
  260. /* success */
  261. case 0:
  262. server->fs_act_jif = jiffies;
  263. server->fs_state = 0;
  264. _leave("");
  265. return 1;
  266. /* the fileserver denied all knowledge of the volume */
  267. case -ENOMEDIUM:
  268. server->fs_act_jif = jiffies;
  269. down_write(&volume->server_sem);
  270. /* firstly, find where the server is in the active list (if it
  271. * is) */
  272. for (loop = 0; loop < volume->nservers; loop++)
  273. if (volume->servers[loop] == server)
  274. goto present;
  275. /* no longer there - may have been discarded by another op */
  276. goto try_next_server_upw;
  277. present:
  278. volume->nservers--;
  279. memmove(&volume->servers[loop],
  280. &volume->servers[loop + 1],
  281. sizeof(volume->servers[loop]) *
  282. (volume->nservers - loop));
  283. volume->servers[volume->nservers] = NULL;
  284. afs_put_server(server);
  285. volume->rjservers++;
  286. if (volume->nservers > 0)
  287. /* another server might acknowledge its existence */
  288. goto try_next_server_upw;
  289. /* handle the case where all the fileservers have rejected the
  290. * volume
  291. * - TODO: try asking the fileservers for volume information
  292. * - TODO: contact the VL server again to see if the volume is
  293. * no longer registered
  294. */
  295. up_write(&volume->server_sem);
  296. afs_put_server(server);
  297. _leave(" [completely rejected]");
  298. return 1;
  299. /* problem reaching the server */
  300. case -ENETUNREACH:
  301. case -EHOSTUNREACH:
  302. case -ECONNREFUSED:
  303. case -ETIME:
  304. case -ETIMEDOUT:
  305. case -EREMOTEIO:
  306. /* mark the server as dead
  307. * TODO: vary dead timeout depending on error
  308. */
  309. spin_lock(&server->fs_lock);
  310. if (!server->fs_state) {
  311. server->fs_dead_jif = jiffies + HZ * 10;
  312. server->fs_state = result;
  313. printk("kAFS: SERVER DEAD state=%d\n", result);
  314. }
  315. spin_unlock(&server->fs_lock);
  316. goto try_next_server;
  317. /* miscellaneous error */
  318. default:
  319. server->fs_act_jif = jiffies;
  320. case -ENOMEM:
  321. case -ENONET:
  322. /* tell the caller to accept the result */
  323. afs_put_server(server);
  324. _leave(" [local failure]");
  325. return 1;
  326. }
  327. /* tell the caller to loop around and try the next server */
  328. try_next_server_upw:
  329. up_write(&volume->server_sem);
  330. try_next_server:
  331. afs_put_server(server);
  332. _leave(" [try next server]");
  333. return 0;
  334. }