volume.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. /* AFS volume management
  2. *
  3. * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #include <linux/kernel.h>
  12. #include <linux/module.h>
  13. #include <linux/init.h>
  14. #include <linux/slab.h>
  15. #include <linux/fs.h>
  16. #include <linux/pagemap.h>
  17. #include "volume.h"
  18. #include "vnode.h"
  19. #include "cell.h"
  20. #include "cache.h"
  21. #include "cmservice.h"
  22. #include "fsclient.h"
  23. #include "vlclient.h"
  24. #include "internal.h"
  25. #ifdef __KDEBUG
  26. static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
  27. #endif
  28. #ifdef AFS_CACHING_SUPPORT
  29. static cachefs_match_val_t afs_volume_cache_match(void *target,
  30. const void *entry);
  31. static void afs_volume_cache_update(void *source, void *entry);
  32. struct cachefs_index_def afs_volume_cache_index_def = {
  33. .name = "volume",
  34. .data_size = sizeof(struct afs_cache_vhash),
  35. .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 1 },
  36. .keys[1] = { CACHEFS_INDEX_KEYS_BIN, 1 },
  37. .match = afs_volume_cache_match,
  38. .update = afs_volume_cache_update,
  39. };
  40. #endif
  41. /*
  42. * lookup a volume by name
  43. * - this can be one of the following:
  44. * "%[cell:]volume[.]" R/W volume
  45. * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0),
  46. * or R/W (rwparent=1) volume
  47. * "%[cell:]volume.readonly" R/O volume
  48. * "#[cell:]volume.readonly" R/O volume
  49. * "%[cell:]volume.backup" Backup volume
  50. * "#[cell:]volume.backup" Backup volume
  51. *
  52. * The cell name is optional, and defaults to the current cell.
  53. *
  54. * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
  55. * Guide
  56. * - Rule 1: Explicit type suffix forces access of that type or nothing
  57. * (no suffix, then use Rule 2 & 3)
  58. * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
  59. * if not available
  60. * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
  61. * explicitly told otherwise
  62. */
  63. int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
  64. struct afs_volume **_volume)
  65. {
  66. struct afs_vlocation *vlocation = NULL;
  67. struct afs_volume *volume = NULL;
  68. afs_voltype_t type;
  69. const char *cellname, *volname, *suffix;
  70. char srvtmask;
  71. int force, ret, loop, cellnamesz, volnamesz;
  72. _enter("%s,,%d,", name, rwpath);
  73. if (!name || (name[0] != '%' && name[0] != '#') || !name[1]) {
  74. printk("kAFS: unparsable volume name\n");
  75. return -EINVAL;
  76. }
  77. /* determine the type of volume we're looking for */
  78. force = 0;
  79. type = AFSVL_ROVOL;
  80. if (rwpath || name[0] == '%') {
  81. type = AFSVL_RWVOL;
  82. force = 1;
  83. }
  84. suffix = strrchr(name, '.');
  85. if (suffix) {
  86. if (strcmp(suffix, ".readonly") == 0) {
  87. type = AFSVL_ROVOL;
  88. force = 1;
  89. } else if (strcmp(suffix, ".backup") == 0) {
  90. type = AFSVL_BACKVOL;
  91. force = 1;
  92. } else if (suffix[1] == 0) {
  93. } else {
  94. suffix = NULL;
  95. }
  96. }
  97. /* split the cell and volume names */
  98. name++;
  99. volname = strchr(name, ':');
  100. if (volname) {
  101. cellname = name;
  102. cellnamesz = volname - name;
  103. volname++;
  104. } else {
  105. volname = name;
  106. cellname = NULL;
  107. cellnamesz = 0;
  108. }
  109. volnamesz = suffix ? suffix - volname : strlen(volname);
  110. _debug("CELL:%*.*s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s",
  111. cellnamesz, cellnamesz, cellname ?: "", cell,
  112. volnamesz, volnamesz, volname, suffix ?: "-",
  113. type,
  114. force ? " FORCE" : "");
  115. /* lookup the cell record */
  116. if (cellname || !cell) {
  117. ret = afs_cell_lookup(cellname, cellnamesz, &cell);
  118. if (ret<0) {
  119. printk("kAFS: unable to lookup cell '%s'\n",
  120. cellname ?: "");
  121. goto error;
  122. }
  123. } else {
  124. afs_get_cell(cell);
  125. }
  126. /* lookup the volume location record */
  127. ret = afs_vlocation_lookup(cell, volname, volnamesz, &vlocation);
  128. if (ret < 0)
  129. goto error;
  130. /* make the final decision on the type we want */
  131. ret = -ENOMEDIUM;
  132. if (force && !(vlocation->vldb.vidmask & (1 << type)))
  133. goto error;
  134. srvtmask = 0;
  135. for (loop = 0; loop < vlocation->vldb.nservers; loop++)
  136. srvtmask |= vlocation->vldb.srvtmask[loop];
  137. if (force) {
  138. if (!(srvtmask & (1 << type)))
  139. goto error;
  140. } else if (srvtmask & AFS_VOL_VTM_RO) {
  141. type = AFSVL_ROVOL;
  142. } else if (srvtmask & AFS_VOL_VTM_RW) {
  143. type = AFSVL_RWVOL;
  144. } else {
  145. goto error;
  146. }
  147. down_write(&cell->vl_sem);
  148. /* is the volume already active? */
  149. if (vlocation->vols[type]) {
  150. /* yes - re-use it */
  151. volume = vlocation->vols[type];
  152. afs_get_volume(volume);
  153. goto success;
  154. }
  155. /* create a new volume record */
  156. _debug("creating new volume record");
  157. ret = -ENOMEM;
  158. volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
  159. if (!volume)
  160. goto error_up;
  161. atomic_set(&volume->usage, 1);
  162. volume->type = type;
  163. volume->type_force = force;
  164. volume->cell = cell;
  165. volume->vid = vlocation->vldb.vid[type];
  166. init_rwsem(&volume->server_sem);
  167. /* look up all the applicable server records */
  168. for (loop = 0; loop < 8; loop++) {
  169. if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
  170. ret = afs_server_lookup(
  171. volume->cell,
  172. &vlocation->vldb.servers[loop],
  173. &volume->servers[volume->nservers]);
  174. if (ret < 0)
  175. goto error_discard;
  176. volume->nservers++;
  177. }
  178. }
  179. /* attach the cache and volume location */
  180. #ifdef AFS_CACHING_SUPPORT
  181. cachefs_acquire_cookie(vlocation->cache,
  182. &afs_vnode_cache_index_def,
  183. volume,
  184. &volume->cache);
  185. #endif
  186. afs_get_vlocation(vlocation);
  187. volume->vlocation = vlocation;
  188. vlocation->vols[type] = volume;
  189. success:
  190. _debug("kAFS selected %s volume %08x",
  191. afs_voltypes[volume->type], volume->vid);
  192. *_volume = volume;
  193. ret = 0;
  194. /* clean up */
  195. error_up:
  196. up_write(&cell->vl_sem);
  197. error:
  198. afs_put_vlocation(vlocation);
  199. afs_put_cell(cell);
  200. _leave(" = %d (%p)", ret, volume);
  201. return ret;
  202. error_discard:
  203. up_write(&cell->vl_sem);
  204. for (loop = volume->nservers - 1; loop >= 0; loop--)
  205. afs_put_server(volume->servers[loop]);
  206. kfree(volume);
  207. goto error;
  208. }
  209. /*
  210. * destroy a volume record
  211. */
  212. void afs_put_volume(struct afs_volume *volume)
  213. {
  214. struct afs_vlocation *vlocation;
  215. int loop;
  216. if (!volume)
  217. return;
  218. _enter("%p", volume);
  219. vlocation = volume->vlocation;
  220. /* sanity check */
  221. BUG_ON(atomic_read(&volume->usage) <= 0);
  222. /* to prevent a race, the decrement and the dequeue must be effectively
  223. * atomic */
  224. down_write(&vlocation->cell->vl_sem);
  225. if (likely(!atomic_dec_and_test(&volume->usage))) {
  226. up_write(&vlocation->cell->vl_sem);
  227. _leave("");
  228. return;
  229. }
  230. vlocation->vols[volume->type] = NULL;
  231. up_write(&vlocation->cell->vl_sem);
  232. /* finish cleaning up the volume */
  233. #ifdef AFS_CACHING_SUPPORT
  234. cachefs_relinquish_cookie(volume->cache, 0);
  235. #endif
  236. afs_put_vlocation(vlocation);
  237. for (loop = volume->nservers - 1; loop >= 0; loop--)
  238. afs_put_server(volume->servers[loop]);
  239. kfree(volume);
  240. _leave(" [destroyed]");
  241. }
  242. /*
  243. * pick a server to use to try accessing this volume
  244. * - returns with an elevated usage count on the server chosen
  245. */
  246. int afs_volume_pick_fileserver(struct afs_volume *volume,
  247. struct afs_server **_server)
  248. {
  249. struct afs_server *server;
  250. int ret, state, loop;
  251. _enter("%s", volume->vlocation->vldb.name);
  252. down_read(&volume->server_sem);
  253. /* handle the no-server case */
  254. if (volume->nservers == 0) {
  255. ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
  256. up_read(&volume->server_sem);
  257. _leave(" = %d [no servers]", ret);
  258. return ret;
  259. }
  260. /* basically, just search the list for the first live server and use
  261. * that */
  262. ret = 0;
  263. for (loop = 0; loop < volume->nservers; loop++) {
  264. server = volume->servers[loop];
  265. state = server->fs_state;
  266. switch (state) {
  267. /* found an apparently healthy server */
  268. case 0:
  269. afs_get_server(server);
  270. up_read(&volume->server_sem);
  271. *_server = server;
  272. _leave(" = 0 (picked %08x)",
  273. ntohl(server->addr.s_addr));
  274. return 0;
  275. case -ENETUNREACH:
  276. if (ret == 0)
  277. ret = state;
  278. break;
  279. case -EHOSTUNREACH:
  280. if (ret == 0 ||
  281. ret == -ENETUNREACH)
  282. ret = state;
  283. break;
  284. case -ECONNREFUSED:
  285. if (ret == 0 ||
  286. ret == -ENETUNREACH ||
  287. ret == -EHOSTUNREACH)
  288. ret = state;
  289. break;
  290. default:
  291. case -EREMOTEIO:
  292. if (ret == 0 ||
  293. ret == -ENETUNREACH ||
  294. ret == -EHOSTUNREACH ||
  295. ret == -ECONNREFUSED)
  296. ret = state;
  297. break;
  298. }
  299. }
  300. /* no available servers
  301. * - TODO: handle the no active servers case better
  302. */
  303. up_read(&volume->server_sem);
  304. _leave(" = %d", ret);
  305. return ret;
  306. }
  307. /*
  308. * release a server after use
  309. * - releases the ref on the server struct that was acquired by picking
  310. * - records result of using a particular server to access a volume
  311. * - return 0 to try again, 1 if okay or to issue error
  312. */
  313. int afs_volume_release_fileserver(struct afs_volume *volume,
  314. struct afs_server *server,
  315. int result)
  316. {
  317. unsigned loop;
  318. _enter("%s,%08x,%d",
  319. volume->vlocation->vldb.name, ntohl(server->addr.s_addr),
  320. result);
  321. switch (result) {
  322. /* success */
  323. case 0:
  324. server->fs_act_jif = jiffies;
  325. break;
  326. /* the fileserver denied all knowledge of the volume */
  327. case -ENOMEDIUM:
  328. server->fs_act_jif = jiffies;
  329. down_write(&volume->server_sem);
  330. /* first, find where the server is in the active list (if it
  331. * is) */
  332. for (loop = 0; loop < volume->nservers; loop++)
  333. if (volume->servers[loop] == server)
  334. goto present;
  335. /* no longer there - may have been discarded by another op */
  336. goto try_next_server_upw;
  337. present:
  338. volume->nservers--;
  339. memmove(&volume->servers[loop],
  340. &volume->servers[loop + 1],
  341. sizeof(volume->servers[loop]) *
  342. (volume->nservers - loop));
  343. volume->servers[volume->nservers] = NULL;
  344. afs_put_server(server);
  345. volume->rjservers++;
  346. if (volume->nservers > 0)
  347. /* another server might acknowledge its existence */
  348. goto try_next_server_upw;
  349. /* handle the case where all the fileservers have rejected the
  350. * volume
  351. * - TODO: try asking the fileservers for volume information
  352. * - TODO: contact the VL server again to see if the volume is
  353. * no longer registered
  354. */
  355. up_write(&volume->server_sem);
  356. afs_put_server(server);
  357. _leave(" [completely rejected]");
  358. return 1;
  359. /* problem reaching the server */
  360. case -ENETUNREACH:
  361. case -EHOSTUNREACH:
  362. case -ECONNREFUSED:
  363. case -ETIMEDOUT:
  364. case -EREMOTEIO:
  365. /* mark the server as dead
  366. * TODO: vary dead timeout depending on error
  367. */
  368. spin_lock(&server->fs_lock);
  369. if (!server->fs_state) {
  370. server->fs_dead_jif = jiffies + HZ * 10;
  371. server->fs_state = result;
  372. printk("kAFS: SERVER DEAD state=%d\n", result);
  373. }
  374. spin_unlock(&server->fs_lock);
  375. goto try_next_server;
  376. /* miscellaneous error */
  377. default:
  378. server->fs_act_jif = jiffies;
  379. case -ENOMEM:
  380. case -ENONET:
  381. break;
  382. }
  383. /* tell the caller to accept the result */
  384. afs_put_server(server);
  385. _leave("");
  386. return 1;
  387. /* tell the caller to loop around and try the next server */
  388. try_next_server_upw:
  389. up_write(&volume->server_sem);
  390. try_next_server:
  391. afs_put_server(server);
  392. _leave(" [try next server]");
  393. return 0;
  394. }
  395. /*
  396. * match a volume hash record stored in the cache
  397. */
  398. #ifdef AFS_CACHING_SUPPORT
  399. static cachefs_match_val_t afs_volume_cache_match(void *target,
  400. const void *entry)
  401. {
  402. const struct afs_cache_vhash *vhash = entry;
  403. struct afs_volume *volume = target;
  404. _enter("{%u},{%u}", volume->type, vhash->vtype);
  405. if (volume->type == vhash->vtype) {
  406. _leave(" = SUCCESS");
  407. return CACHEFS_MATCH_SUCCESS;
  408. }
  409. _leave(" = FAILED");
  410. return CACHEFS_MATCH_FAILED;
  411. }
  412. #endif
  413. /*
  414. * update a volume hash record stored in the cache
  415. */
  416. #ifdef AFS_CACHING_SUPPORT
  417. static void afs_volume_cache_update(void *source, void *entry)
  418. {
  419. struct afs_cache_vhash *vhash = entry;
  420. struct afs_volume *volume = source;
  421. _enter("");
  422. vhash->vtype = volume->type;
  423. }
  424. #endif