volume.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. /* volume.c: AFS volume management
  2. *
  3. * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #include <linux/kernel.h>
  12. #include <linux/module.h>
  13. #include <linux/init.h>
  14. #include <linux/slab.h>
  15. #include <linux/fs.h>
  16. #include <linux/pagemap.h>
  17. #include "volume.h"
  18. #include "vnode.h"
  19. #include "cell.h"
  20. #include "cache.h"
  21. #include "cmservice.h"
  22. #include "fsclient.h"
  23. #include "vlclient.h"
  24. #include "internal.h"
  25. #ifdef __KDEBUG
  26. static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
  27. #endif
  28. #ifdef AFS_CACHING_SUPPORT
  29. static cachefs_match_val_t afs_volume_cache_match(void *target,
  30. const void *entry);
  31. static void afs_volume_cache_update(void *source, void *entry);
  32. struct cachefs_index_def afs_volume_cache_index_def = {
  33. .name = "volume",
  34. .data_size = sizeof(struct afs_cache_vhash),
  35. .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 1 },
  36. .keys[1] = { CACHEFS_INDEX_KEYS_BIN, 1 },
  37. .match = afs_volume_cache_match,
  38. .update = afs_volume_cache_update,
  39. };
  40. #endif
  41. /*****************************************************************************/
  42. /*
  43. * lookup a volume by name
  44. * - this can be one of the following:
  45. * "%[cell:]volume[.]" R/W volume
  46. * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0),
  47. * or R/W (rwparent=1) volume
  48. * "%[cell:]volume.readonly" R/O volume
  49. * "#[cell:]volume.readonly" R/O volume
  50. * "%[cell:]volume.backup" Backup volume
  51. * "#[cell:]volume.backup" Backup volume
  52. *
  53. * The cell name is optional, and defaults to the current cell.
  54. *
  55. * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
  56. * Guide
  57. * - Rule 1: Explicit type suffix forces access of that type or nothing
  58. * (no suffix, then use Rule 2 & 3)
  59. * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
  60. * if not available
  61. * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
  62. * explicitly told otherwise
  63. */
  64. int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
  65. struct afs_volume **_volume)
  66. {
  67. struct afs_vlocation *vlocation = NULL;
  68. struct afs_volume *volume = NULL;
  69. afs_voltype_t type;
  70. const char *cellname, *volname, *suffix;
  71. char srvtmask;
  72. int force, ret, loop, cellnamesz, volnamesz;
  73. _enter("%s,,%d,", name, rwpath);
  74. if (!name || (name[0] != '%' && name[0] != '#') || !name[1]) {
  75. printk("kAFS: unparsable volume name\n");
  76. return -EINVAL;
  77. }
  78. /* determine the type of volume we're looking for */
  79. force = 0;
  80. type = AFSVL_ROVOL;
  81. if (rwpath || name[0] == '%') {
  82. type = AFSVL_RWVOL;
  83. force = 1;
  84. }
  85. suffix = strrchr(name, '.');
  86. if (suffix) {
  87. if (strcmp(suffix, ".readonly") == 0) {
  88. type = AFSVL_ROVOL;
  89. force = 1;
  90. }
  91. else if (strcmp(suffix, ".backup") == 0) {
  92. type = AFSVL_BACKVOL;
  93. force = 1;
  94. }
  95. else if (suffix[1] == 0) {
  96. }
  97. else {
  98. suffix = NULL;
  99. }
  100. }
  101. /* split the cell and volume names */
  102. name++;
  103. volname = strchr(name, ':');
  104. if (volname) {
  105. cellname = name;
  106. cellnamesz = volname - name;
  107. volname++;
  108. }
  109. else {
  110. volname = name;
  111. cellname = NULL;
  112. cellnamesz = 0;
  113. }
  114. volnamesz = suffix ? suffix - volname : strlen(volname);
  115. _debug("CELL:%*.*s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s",
  116. cellnamesz, cellnamesz, cellname ?: "", cell,
  117. volnamesz, volnamesz, volname, suffix ?: "-",
  118. type,
  119. force ? " FORCE" : "");
  120. /* lookup the cell record */
  121. if (cellname || !cell) {
  122. ret = afs_cell_lookup(cellname, cellnamesz, &cell);
  123. if (ret<0) {
  124. printk("kAFS: unable to lookup cell '%s'\n",
  125. cellname ?: "");
  126. goto error;
  127. }
  128. }
  129. else {
  130. afs_get_cell(cell);
  131. }
  132. /* lookup the volume location record */
  133. ret = afs_vlocation_lookup(cell, volname, volnamesz, &vlocation);
  134. if (ret < 0)
  135. goto error;
  136. /* make the final decision on the type we want */
  137. ret = -ENOMEDIUM;
  138. if (force && !(vlocation->vldb.vidmask & (1 << type)))
  139. goto error;
  140. srvtmask = 0;
  141. for (loop = 0; loop < vlocation->vldb.nservers; loop++)
  142. srvtmask |= vlocation->vldb.srvtmask[loop];
  143. if (force) {
  144. if (!(srvtmask & (1 << type)))
  145. goto error;
  146. }
  147. else if (srvtmask & AFS_VOL_VTM_RO) {
  148. type = AFSVL_ROVOL;
  149. }
  150. else if (srvtmask & AFS_VOL_VTM_RW) {
  151. type = AFSVL_RWVOL;
  152. }
  153. else {
  154. goto error;
  155. }
  156. down_write(&cell->vl_sem);
  157. /* is the volume already active? */
  158. if (vlocation->vols[type]) {
  159. /* yes - re-use it */
  160. volume = vlocation->vols[type];
  161. afs_get_volume(volume);
  162. goto success;
  163. }
  164. /* create a new volume record */
  165. _debug("creating new volume record");
  166. ret = -ENOMEM;
  167. volume = kmalloc(sizeof(struct afs_volume), GFP_KERNEL);
  168. if (!volume)
  169. goto error_up;
  170. memset(volume, 0, sizeof(struct afs_volume));
  171. atomic_set(&volume->usage, 1);
  172. volume->type = type;
  173. volume->type_force = force;
  174. volume->cell = cell;
  175. volume->vid = vlocation->vldb.vid[type];
  176. init_rwsem(&volume->server_sem);
  177. /* look up all the applicable server records */
  178. for (loop = 0; loop < 8; loop++) {
  179. if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
  180. ret = afs_server_lookup(
  181. volume->cell,
  182. &vlocation->vldb.servers[loop],
  183. &volume->servers[volume->nservers]);
  184. if (ret < 0)
  185. goto error_discard;
  186. volume->nservers++;
  187. }
  188. }
  189. /* attach the cache and volume location */
  190. #ifdef AFS_CACHING_SUPPORT
  191. cachefs_acquire_cookie(vlocation->cache,
  192. &afs_vnode_cache_index_def,
  193. volume,
  194. &volume->cache);
  195. #endif
  196. afs_get_vlocation(vlocation);
  197. volume->vlocation = vlocation;
  198. vlocation->vols[type] = volume;
  199. success:
  200. _debug("kAFS selected %s volume %08x",
  201. afs_voltypes[volume->type], volume->vid);
  202. *_volume = volume;
  203. ret = 0;
  204. /* clean up */
  205. error_up:
  206. up_write(&cell->vl_sem);
  207. error:
  208. afs_put_vlocation(vlocation);
  209. afs_put_cell(cell);
  210. _leave(" = %d (%p)", ret, volume);
  211. return ret;
  212. error_discard:
  213. up_write(&cell->vl_sem);
  214. for (loop = volume->nservers - 1; loop >= 0; loop--)
  215. afs_put_server(volume->servers[loop]);
  216. kfree(volume);
  217. goto error;
  218. } /* end afs_volume_lookup() */
  219. /*****************************************************************************/
  220. /*
  221. * destroy a volume record
  222. */
  223. void afs_put_volume(struct afs_volume *volume)
  224. {
  225. struct afs_vlocation *vlocation;
  226. int loop;
  227. if (!volume)
  228. return;
  229. _enter("%p", volume);
  230. vlocation = volume->vlocation;
  231. /* sanity check */
  232. BUG_ON(atomic_read(&volume->usage) <= 0);
  233. /* to prevent a race, the decrement and the dequeue must be effectively
  234. * atomic */
  235. down_write(&vlocation->cell->vl_sem);
  236. if (likely(!atomic_dec_and_test(&volume->usage))) {
  237. up_write(&vlocation->cell->vl_sem);
  238. _leave("");
  239. return;
  240. }
  241. vlocation->vols[volume->type] = NULL;
  242. up_write(&vlocation->cell->vl_sem);
  243. /* finish cleaning up the volume */
  244. #ifdef AFS_CACHING_SUPPORT
  245. cachefs_relinquish_cookie(volume->cache, 0);
  246. #endif
  247. afs_put_vlocation(vlocation);
  248. for (loop = volume->nservers - 1; loop >= 0; loop--)
  249. afs_put_server(volume->servers[loop]);
  250. kfree(volume);
  251. _leave(" [destroyed]");
  252. } /* end afs_put_volume() */
  253. /*****************************************************************************/
  254. /*
  255. * pick a server to use to try accessing this volume
  256. * - returns with an elevated usage count on the server chosen
  257. */
  258. int afs_volume_pick_fileserver(struct afs_volume *volume,
  259. struct afs_server **_server)
  260. {
  261. struct afs_server *server;
  262. int ret, state, loop;
  263. _enter("%s", volume->vlocation->vldb.name);
  264. down_read(&volume->server_sem);
  265. /* handle the no-server case */
  266. if (volume->nservers == 0) {
  267. ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
  268. up_read(&volume->server_sem);
  269. _leave(" = %d [no servers]", ret);
  270. return ret;
  271. }
  272. /* basically, just search the list for the first live server and use
  273. * that */
  274. ret = 0;
  275. for (loop = 0; loop < volume->nservers; loop++) {
  276. server = volume->servers[loop];
  277. state = server->fs_state;
  278. switch (state) {
  279. /* found an apparently healthy server */
  280. case 0:
  281. afs_get_server(server);
  282. up_read(&volume->server_sem);
  283. *_server = server;
  284. _leave(" = 0 (picked %08x)",
  285. ntohl(server->addr.s_addr));
  286. return 0;
  287. case -ENETUNREACH:
  288. if (ret == 0)
  289. ret = state;
  290. break;
  291. case -EHOSTUNREACH:
  292. if (ret == 0 ||
  293. ret == -ENETUNREACH)
  294. ret = state;
  295. break;
  296. case -ECONNREFUSED:
  297. if (ret == 0 ||
  298. ret == -ENETUNREACH ||
  299. ret == -EHOSTUNREACH)
  300. ret = state;
  301. break;
  302. default:
  303. case -EREMOTEIO:
  304. if (ret == 0 ||
  305. ret == -ENETUNREACH ||
  306. ret == -EHOSTUNREACH ||
  307. ret == -ECONNREFUSED)
  308. ret = state;
  309. break;
  310. }
  311. }
  312. /* no available servers
  313. * - TODO: handle the no active servers case better
  314. */
  315. up_read(&volume->server_sem);
  316. _leave(" = %d", ret);
  317. return ret;
  318. } /* end afs_volume_pick_fileserver() */
  319. /*****************************************************************************/
  320. /*
  321. * release a server after use
  322. * - releases the ref on the server struct that was acquired by picking
  323. * - records result of using a particular server to access a volume
  324. * - return 0 to try again, 1 if okay or to issue error
  325. */
  326. int afs_volume_release_fileserver(struct afs_volume *volume,
  327. struct afs_server *server,
  328. int result)
  329. {
  330. unsigned loop;
  331. _enter("%s,%08x,%d",
  332. volume->vlocation->vldb.name, ntohl(server->addr.s_addr),
  333. result);
  334. switch (result) {
  335. /* success */
  336. case 0:
  337. server->fs_act_jif = jiffies;
  338. break;
  339. /* the fileserver denied all knowledge of the volume */
  340. case -ENOMEDIUM:
  341. server->fs_act_jif = jiffies;
  342. down_write(&volume->server_sem);
  343. /* first, find where the server is in the active list (if it
  344. * is) */
  345. for (loop = 0; loop < volume->nservers; loop++)
  346. if (volume->servers[loop] == server)
  347. goto present;
  348. /* no longer there - may have been discarded by another op */
  349. goto try_next_server_upw;
  350. present:
  351. volume->nservers--;
  352. memmove(&volume->servers[loop],
  353. &volume->servers[loop + 1],
  354. sizeof(volume->servers[loop]) *
  355. (volume->nservers - loop));
  356. volume->servers[volume->nservers] = NULL;
  357. afs_put_server(server);
  358. volume->rjservers++;
  359. if (volume->nservers > 0)
  360. /* another server might acknowledge its existence */
  361. goto try_next_server_upw;
  362. /* handle the case where all the fileservers have rejected the
  363. * volume
  364. * - TODO: try asking the fileservers for volume information
  365. * - TODO: contact the VL server again to see if the volume is
  366. * no longer registered
  367. */
  368. up_write(&volume->server_sem);
  369. afs_put_server(server);
  370. _leave(" [completely rejected]");
  371. return 1;
  372. /* problem reaching the server */
  373. case -ENETUNREACH:
  374. case -EHOSTUNREACH:
  375. case -ECONNREFUSED:
  376. case -ETIMEDOUT:
  377. case -EREMOTEIO:
  378. /* mark the server as dead
  379. * TODO: vary dead timeout depending on error
  380. */
  381. spin_lock(&server->fs_lock);
  382. if (!server->fs_state) {
  383. server->fs_dead_jif = jiffies + HZ * 10;
  384. server->fs_state = result;
  385. printk("kAFS: SERVER DEAD state=%d\n", result);
  386. }
  387. spin_unlock(&server->fs_lock);
  388. goto try_next_server;
  389. /* miscellaneous error */
  390. default:
  391. server->fs_act_jif = jiffies;
  392. case -ENOMEM:
  393. case -ENONET:
  394. break;
  395. }
  396. /* tell the caller to accept the result */
  397. afs_put_server(server);
  398. _leave("");
  399. return 1;
  400. /* tell the caller to loop around and try the next server */
  401. try_next_server_upw:
  402. up_write(&volume->server_sem);
  403. try_next_server:
  404. afs_put_server(server);
  405. _leave(" [try next server]");
  406. return 0;
  407. } /* end afs_volume_release_fileserver() */
  408. /*****************************************************************************/
  409. /*
  410. * match a volume hash record stored in the cache
  411. */
  412. #ifdef AFS_CACHING_SUPPORT
  413. static cachefs_match_val_t afs_volume_cache_match(void *target,
  414. const void *entry)
  415. {
  416. const struct afs_cache_vhash *vhash = entry;
  417. struct afs_volume *volume = target;
  418. _enter("{%u},{%u}", volume->type, vhash->vtype);
  419. if (volume->type == vhash->vtype) {
  420. _leave(" = SUCCESS");
  421. return CACHEFS_MATCH_SUCCESS;
  422. }
  423. _leave(" = FAILED");
  424. return CACHEFS_MATCH_FAILED;
  425. } /* end afs_volume_cache_match() */
  426. #endif
  427. /*****************************************************************************/
  428. /*
  429. * update a volume hash record stored in the cache
  430. */
  431. #ifdef AFS_CACHING_SUPPORT
  432. static void afs_volume_cache_update(void *source, void *entry)
  433. {
  434. struct afs_cache_vhash *vhash = entry;
  435. struct afs_volume *volume = source;
  436. _enter("");
  437. vhash->vtype = volume->type;
  438. } /* end afs_volume_cache_update() */
  439. #endif