volume.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519
  1. /* volume.c: AFS volume management
  2. *
  3. * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #include <linux/kernel.h>
  12. #include <linux/module.h>
  13. #include <linux/init.h>
  14. #include <linux/slab.h>
  15. #include <linux/fs.h>
  16. #include <linux/pagemap.h>
  17. #include "volume.h"
  18. #include "vnode.h"
  19. #include "cell.h"
  20. #include "cache.h"
  21. #include "cmservice.h"
  22. #include "fsclient.h"
  23. #include "vlclient.h"
  24. #include "internal.h"
  25. #ifdef __KDEBUG
  26. static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
  27. #endif
  28. #ifdef AFS_CACHING_SUPPORT
  29. static cachefs_match_val_t afs_volume_cache_match(void *target,
  30. const void *entry);
  31. static void afs_volume_cache_update(void *source, void *entry);
  32. struct cachefs_index_def afs_volume_cache_index_def = {
  33. .name = "volume",
  34. .data_size = sizeof(struct afs_cache_vhash),
  35. .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 1 },
  36. .keys[1] = { CACHEFS_INDEX_KEYS_BIN, 1 },
  37. .match = afs_volume_cache_match,
  38. .update = afs_volume_cache_update,
  39. };
  40. #endif
  41. /*****************************************************************************/
  42. /*
  43. * lookup a volume by name
  44. * - this can be one of the following:
  45. * "%[cell:]volume[.]" R/W volume
  46. * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0),
  47. * or R/W (rwparent=1) volume
  48. * "%[cell:]volume.readonly" R/O volume
  49. * "#[cell:]volume.readonly" R/O volume
  50. * "%[cell:]volume.backup" Backup volume
  51. * "#[cell:]volume.backup" Backup volume
  52. *
  53. * The cell name is optional, and defaults to the current cell.
  54. *
  55. * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
  56. * Guide
  57. * - Rule 1: Explicit type suffix forces access of that type or nothing
  58. * (no suffix, then use Rule 2 & 3)
  59. * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
  60. * if not available
  61. * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
  62. * explicitly told otherwise
  63. */
  64. int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
  65. struct afs_volume **_volume)
  66. {
  67. struct afs_vlocation *vlocation = NULL;
  68. struct afs_volume *volume = NULL;
  69. afs_voltype_t type;
  70. const char *cellname, *volname, *suffix;
  71. char srvtmask;
  72. int force, ret, loop, cellnamesz, volnamesz;
  73. _enter("%s,,%d,", name, rwpath);
  74. if (!name || (name[0] != '%' && name[0] != '#') || !name[1]) {
  75. printk("kAFS: unparsable volume name\n");
  76. return -EINVAL;
  77. }
  78. /* determine the type of volume we're looking for */
  79. force = 0;
  80. type = AFSVL_ROVOL;
  81. if (rwpath || name[0] == '%') {
  82. type = AFSVL_RWVOL;
  83. force = 1;
  84. }
  85. suffix = strrchr(name, '.');
  86. if (suffix) {
  87. if (strcmp(suffix, ".readonly") == 0) {
  88. type = AFSVL_ROVOL;
  89. force = 1;
  90. }
  91. else if (strcmp(suffix, ".backup") == 0) {
  92. type = AFSVL_BACKVOL;
  93. force = 1;
  94. }
  95. else if (suffix[1] == 0) {
  96. }
  97. else {
  98. suffix = NULL;
  99. }
  100. }
  101. /* split the cell and volume names */
  102. name++;
  103. volname = strchr(name, ':');
  104. if (volname) {
  105. cellname = name;
  106. cellnamesz = volname - name;
  107. volname++;
  108. }
  109. else {
  110. volname = name;
  111. cellname = NULL;
  112. cellnamesz = 0;
  113. }
  114. volnamesz = suffix ? suffix - volname : strlen(volname);
  115. _debug("CELL:%*.*s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s",
  116. cellnamesz, cellnamesz, cellname ?: "", cell,
  117. volnamesz, volnamesz, volname, suffix ?: "-",
  118. type,
  119. force ? " FORCE" : "");
  120. /* lookup the cell record */
  121. if (cellname || !cell) {
  122. ret = afs_cell_lookup(cellname, cellnamesz, &cell);
  123. if (ret<0) {
  124. printk("kAFS: unable to lookup cell '%s'\n",
  125. cellname ?: "");
  126. goto error;
  127. }
  128. }
  129. else {
  130. afs_get_cell(cell);
  131. }
  132. /* lookup the volume location record */
  133. ret = afs_vlocation_lookup(cell, volname, volnamesz, &vlocation);
  134. if (ret < 0)
  135. goto error;
  136. /* make the final decision on the type we want */
  137. ret = -ENOMEDIUM;
  138. if (force && !(vlocation->vldb.vidmask & (1 << type)))
  139. goto error;
  140. srvtmask = 0;
  141. for (loop = 0; loop < vlocation->vldb.nservers; loop++)
  142. srvtmask |= vlocation->vldb.srvtmask[loop];
  143. if (force) {
  144. if (!(srvtmask & (1 << type)))
  145. goto error;
  146. }
  147. else if (srvtmask & AFS_VOL_VTM_RO) {
  148. type = AFSVL_ROVOL;
  149. }
  150. else if (srvtmask & AFS_VOL_VTM_RW) {
  151. type = AFSVL_RWVOL;
  152. }
  153. else {
  154. goto error;
  155. }
  156. down_write(&cell->vl_sem);
  157. /* is the volume already active? */
  158. if (vlocation->vols[type]) {
  159. /* yes - re-use it */
  160. volume = vlocation->vols[type];
  161. afs_get_volume(volume);
  162. goto success;
  163. }
  164. /* create a new volume record */
  165. _debug("creating new volume record");
  166. ret = -ENOMEM;
  167. volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
  168. if (!volume)
  169. goto error_up;
  170. atomic_set(&volume->usage, 1);
  171. volume->type = type;
  172. volume->type_force = force;
  173. volume->cell = cell;
  174. volume->vid = vlocation->vldb.vid[type];
  175. init_rwsem(&volume->server_sem);
  176. /* look up all the applicable server records */
  177. for (loop = 0; loop < 8; loop++) {
  178. if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
  179. ret = afs_server_lookup(
  180. volume->cell,
  181. &vlocation->vldb.servers[loop],
  182. &volume->servers[volume->nservers]);
  183. if (ret < 0)
  184. goto error_discard;
  185. volume->nservers++;
  186. }
  187. }
  188. /* attach the cache and volume location */
  189. #ifdef AFS_CACHING_SUPPORT
  190. cachefs_acquire_cookie(vlocation->cache,
  191. &afs_vnode_cache_index_def,
  192. volume,
  193. &volume->cache);
  194. #endif
  195. afs_get_vlocation(vlocation);
  196. volume->vlocation = vlocation;
  197. vlocation->vols[type] = volume;
  198. success:
  199. _debug("kAFS selected %s volume %08x",
  200. afs_voltypes[volume->type], volume->vid);
  201. *_volume = volume;
  202. ret = 0;
  203. /* clean up */
  204. error_up:
  205. up_write(&cell->vl_sem);
  206. error:
  207. afs_put_vlocation(vlocation);
  208. afs_put_cell(cell);
  209. _leave(" = %d (%p)", ret, volume);
  210. return ret;
  211. error_discard:
  212. up_write(&cell->vl_sem);
  213. for (loop = volume->nservers - 1; loop >= 0; loop--)
  214. afs_put_server(volume->servers[loop]);
  215. kfree(volume);
  216. goto error;
  217. } /* end afs_volume_lookup() */
  218. /*****************************************************************************/
  219. /*
  220. * destroy a volume record
  221. */
  222. void afs_put_volume(struct afs_volume *volume)
  223. {
  224. struct afs_vlocation *vlocation;
  225. int loop;
  226. if (!volume)
  227. return;
  228. _enter("%p", volume);
  229. vlocation = volume->vlocation;
  230. /* sanity check */
  231. BUG_ON(atomic_read(&volume->usage) <= 0);
  232. /* to prevent a race, the decrement and the dequeue must be effectively
  233. * atomic */
  234. down_write(&vlocation->cell->vl_sem);
  235. if (likely(!atomic_dec_and_test(&volume->usage))) {
  236. up_write(&vlocation->cell->vl_sem);
  237. _leave("");
  238. return;
  239. }
  240. vlocation->vols[volume->type] = NULL;
  241. up_write(&vlocation->cell->vl_sem);
  242. /* finish cleaning up the volume */
  243. #ifdef AFS_CACHING_SUPPORT
  244. cachefs_relinquish_cookie(volume->cache, 0);
  245. #endif
  246. afs_put_vlocation(vlocation);
  247. for (loop = volume->nservers - 1; loop >= 0; loop--)
  248. afs_put_server(volume->servers[loop]);
  249. kfree(volume);
  250. _leave(" [destroyed]");
  251. } /* end afs_put_volume() */
  252. /*****************************************************************************/
  253. /*
  254. * pick a server to use to try accessing this volume
  255. * - returns with an elevated usage count on the server chosen
  256. */
  257. int afs_volume_pick_fileserver(struct afs_volume *volume,
  258. struct afs_server **_server)
  259. {
  260. struct afs_server *server;
  261. int ret, state, loop;
  262. _enter("%s", volume->vlocation->vldb.name);
  263. down_read(&volume->server_sem);
  264. /* handle the no-server case */
  265. if (volume->nservers == 0) {
  266. ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
  267. up_read(&volume->server_sem);
  268. _leave(" = %d [no servers]", ret);
  269. return ret;
  270. }
  271. /* basically, just search the list for the first live server and use
  272. * that */
  273. ret = 0;
  274. for (loop = 0; loop < volume->nservers; loop++) {
  275. server = volume->servers[loop];
  276. state = server->fs_state;
  277. switch (state) {
  278. /* found an apparently healthy server */
  279. case 0:
  280. afs_get_server(server);
  281. up_read(&volume->server_sem);
  282. *_server = server;
  283. _leave(" = 0 (picked %08x)",
  284. ntohl(server->addr.s_addr));
  285. return 0;
  286. case -ENETUNREACH:
  287. if (ret == 0)
  288. ret = state;
  289. break;
  290. case -EHOSTUNREACH:
  291. if (ret == 0 ||
  292. ret == -ENETUNREACH)
  293. ret = state;
  294. break;
  295. case -ECONNREFUSED:
  296. if (ret == 0 ||
  297. ret == -ENETUNREACH ||
  298. ret == -EHOSTUNREACH)
  299. ret = state;
  300. break;
  301. default:
  302. case -EREMOTEIO:
  303. if (ret == 0 ||
  304. ret == -ENETUNREACH ||
  305. ret == -EHOSTUNREACH ||
  306. ret == -ECONNREFUSED)
  307. ret = state;
  308. break;
  309. }
  310. }
  311. /* no available servers
  312. * - TODO: handle the no active servers case better
  313. */
  314. up_read(&volume->server_sem);
  315. _leave(" = %d", ret);
  316. return ret;
  317. } /* end afs_volume_pick_fileserver() */
  318. /*****************************************************************************/
  319. /*
  320. * release a server after use
  321. * - releases the ref on the server struct that was acquired by picking
  322. * - records result of using a particular server to access a volume
  323. * - return 0 to try again, 1 if okay or to issue error
  324. */
  325. int afs_volume_release_fileserver(struct afs_volume *volume,
  326. struct afs_server *server,
  327. int result)
  328. {
  329. unsigned loop;
  330. _enter("%s,%08x,%d",
  331. volume->vlocation->vldb.name, ntohl(server->addr.s_addr),
  332. result);
  333. switch (result) {
  334. /* success */
  335. case 0:
  336. server->fs_act_jif = jiffies;
  337. break;
  338. /* the fileserver denied all knowledge of the volume */
  339. case -ENOMEDIUM:
  340. server->fs_act_jif = jiffies;
  341. down_write(&volume->server_sem);
  342. /* first, find where the server is in the active list (if it
  343. * is) */
  344. for (loop = 0; loop < volume->nservers; loop++)
  345. if (volume->servers[loop] == server)
  346. goto present;
  347. /* no longer there - may have been discarded by another op */
  348. goto try_next_server_upw;
  349. present:
  350. volume->nservers--;
  351. memmove(&volume->servers[loop],
  352. &volume->servers[loop + 1],
  353. sizeof(volume->servers[loop]) *
  354. (volume->nservers - loop));
  355. volume->servers[volume->nservers] = NULL;
  356. afs_put_server(server);
  357. volume->rjservers++;
  358. if (volume->nservers > 0)
  359. /* another server might acknowledge its existence */
  360. goto try_next_server_upw;
  361. /* handle the case where all the fileservers have rejected the
  362. * volume
  363. * - TODO: try asking the fileservers for volume information
  364. * - TODO: contact the VL server again to see if the volume is
  365. * no longer registered
  366. */
  367. up_write(&volume->server_sem);
  368. afs_put_server(server);
  369. _leave(" [completely rejected]");
  370. return 1;
  371. /* problem reaching the server */
  372. case -ENETUNREACH:
  373. case -EHOSTUNREACH:
  374. case -ECONNREFUSED:
  375. case -ETIMEDOUT:
  376. case -EREMOTEIO:
  377. /* mark the server as dead
  378. * TODO: vary dead timeout depending on error
  379. */
  380. spin_lock(&server->fs_lock);
  381. if (!server->fs_state) {
  382. server->fs_dead_jif = jiffies + HZ * 10;
  383. server->fs_state = result;
  384. printk("kAFS: SERVER DEAD state=%d\n", result);
  385. }
  386. spin_unlock(&server->fs_lock);
  387. goto try_next_server;
  388. /* miscellaneous error */
  389. default:
  390. server->fs_act_jif = jiffies;
  391. case -ENOMEM:
  392. case -ENONET:
  393. break;
  394. }
  395. /* tell the caller to accept the result */
  396. afs_put_server(server);
  397. _leave("");
  398. return 1;
  399. /* tell the caller to loop around and try the next server */
  400. try_next_server_upw:
  401. up_write(&volume->server_sem);
  402. try_next_server:
  403. afs_put_server(server);
  404. _leave(" [try next server]");
  405. return 0;
  406. } /* end afs_volume_release_fileserver() */
  407. /*****************************************************************************/
  408. /*
  409. * match a volume hash record stored in the cache
  410. */
  411. #ifdef AFS_CACHING_SUPPORT
  412. static cachefs_match_val_t afs_volume_cache_match(void *target,
  413. const void *entry)
  414. {
  415. const struct afs_cache_vhash *vhash = entry;
  416. struct afs_volume *volume = target;
  417. _enter("{%u},{%u}", volume->type, vhash->vtype);
  418. if (volume->type == vhash->vtype) {
  419. _leave(" = SUCCESS");
  420. return CACHEFS_MATCH_SUCCESS;
  421. }
  422. _leave(" = FAILED");
  423. return CACHEFS_MATCH_FAILED;
  424. } /* end afs_volume_cache_match() */
  425. #endif
  426. /*****************************************************************************/
  427. /*
  428. * update a volume hash record stored in the cache
  429. */
  430. #ifdef AFS_CACHING_SUPPORT
  431. static void afs_volume_cache_update(void *source, void *entry)
  432. {
  433. struct afs_cache_vhash *vhash = entry;
  434. struct afs_volume *volume = source;
  435. _enter("");
  436. vhash->vtype = volume->type;
  437. } /* end afs_volume_cache_update() */
  438. #endif