xattr.c 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054
  1. #include <linux/ceph/ceph_debug.h>
  2. #include "super.h"
  3. #include "mds_client.h"
  4. #include <linux/ceph/decode.h>
  5. #include <linux/xattr.h>
  6. #include <linux/slab.h>
  7. #define XATTR_CEPH_PREFIX "ceph."
  8. #define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
  9. static bool ceph_is_valid_xattr(const char *name)
  10. {
  11. return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
  12. !strncmp(name, XATTR_SECURITY_PREFIX,
  13. XATTR_SECURITY_PREFIX_LEN) ||
  14. !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
  15. !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
  16. }
  17. /*
  18. * These define virtual xattrs exposing the recursive directory
  19. * statistics and layout metadata.
  20. */
  21. struct ceph_vxattr {
  22. char *name;
  23. size_t name_size; /* strlen(name) + 1 (for '\0') */
  24. size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
  25. size_t size);
  26. bool readonly, hidden;
  27. bool (*exists_cb)(struct ceph_inode_info *ci);
  28. };
  29. /* layouts */
  30. static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
  31. {
  32. size_t s;
  33. char *p = (char *)&ci->i_layout;
  34. for (s = 0; s < sizeof(ci->i_layout); s++, p++)
  35. if (*p)
  36. return true;
  37. return false;
  38. }
  39. static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
  40. size_t size)
  41. {
  42. int ret;
  43. struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
  44. struct ceph_osd_client *osdc = &fsc->client->osdc;
  45. s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
  46. const char *pool_name;
  47. dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
  48. down_read(&osdc->map_sem);
  49. pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
  50. if (pool_name)
  51. ret = snprintf(val, size,
  52. "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s",
  53. (unsigned long long)ceph_file_layout_su(ci->i_layout),
  54. (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
  55. (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
  56. pool_name);
  57. else
  58. ret = snprintf(val, size,
  59. "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld",
  60. (unsigned long long)ceph_file_layout_su(ci->i_layout),
  61. (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
  62. (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
  63. (unsigned long long)pool);
  64. up_read(&osdc->map_sem);
  65. return ret;
  66. }
  67. static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
  68. char *val, size_t size)
  69. {
  70. return snprintf(val, size, "%lld",
  71. (unsigned long long)ceph_file_layout_su(ci->i_layout));
  72. }
  73. static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
  74. char *val, size_t size)
  75. {
  76. return snprintf(val, size, "%lld",
  77. (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout));
  78. }
  79. static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
  80. char *val, size_t size)
  81. {
  82. return snprintf(val, size, "%lld",
  83. (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
  84. }
  85. static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
  86. char *val, size_t size)
  87. {
  88. int ret;
  89. struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
  90. struct ceph_osd_client *osdc = &fsc->client->osdc;
  91. s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
  92. const char *pool_name;
  93. down_read(&osdc->map_sem);
  94. pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
  95. if (pool_name)
  96. ret = snprintf(val, size, "%s", pool_name);
  97. else
  98. ret = snprintf(val, size, "%lld", (unsigned long long)pool);
  99. up_read(&osdc->map_sem);
  100. return ret;
  101. }
  102. /* directories */
  103. static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
  104. size_t size)
  105. {
  106. return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs);
  107. }
  108. static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
  109. size_t size)
  110. {
  111. return snprintf(val, size, "%lld", ci->i_files);
  112. }
  113. static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
  114. size_t size)
  115. {
  116. return snprintf(val, size, "%lld", ci->i_subdirs);
  117. }
  118. static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
  119. size_t size)
  120. {
  121. return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs);
  122. }
  123. static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
  124. size_t size)
  125. {
  126. return snprintf(val, size, "%lld", ci->i_rfiles);
  127. }
  128. static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
  129. size_t size)
  130. {
  131. return snprintf(val, size, "%lld", ci->i_rsubdirs);
  132. }
  133. static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
  134. size_t size)
  135. {
  136. return snprintf(val, size, "%lld", ci->i_rbytes);
  137. }
  138. static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
  139. size_t size)
  140. {
  141. return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec,
  142. (long)ci->i_rctime.tv_nsec);
  143. }
  144. #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
  145. #define CEPH_XATTR_NAME2(_type, _name, _name2) \
  146. XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
  147. #define XATTR_NAME_CEPH(_type, _name) \
  148. { \
  149. .name = CEPH_XATTR_NAME(_type, _name), \
  150. .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
  151. .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
  152. .readonly = true, \
  153. .hidden = false, \
  154. .exists_cb = NULL, \
  155. }
  156. #define XATTR_LAYOUT_FIELD(_type, _name, _field) \
  157. { \
  158. .name = CEPH_XATTR_NAME2(_type, _name, _field), \
  159. .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
  160. .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
  161. .readonly = false, \
  162. .hidden = true, \
  163. .exists_cb = ceph_vxattrcb_layout_exists, \
  164. }
  165. static struct ceph_vxattr ceph_dir_vxattrs[] = {
  166. {
  167. .name = "ceph.dir.layout",
  168. .name_size = sizeof("ceph.dir.layout"),
  169. .getxattr_cb = ceph_vxattrcb_layout,
  170. .readonly = false,
  171. .hidden = false,
  172. .exists_cb = ceph_vxattrcb_layout_exists,
  173. },
  174. XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
  175. XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
  176. XATTR_LAYOUT_FIELD(dir, layout, object_size),
  177. XATTR_LAYOUT_FIELD(dir, layout, pool),
  178. XATTR_NAME_CEPH(dir, entries),
  179. XATTR_NAME_CEPH(dir, files),
  180. XATTR_NAME_CEPH(dir, subdirs),
  181. XATTR_NAME_CEPH(dir, rentries),
  182. XATTR_NAME_CEPH(dir, rfiles),
  183. XATTR_NAME_CEPH(dir, rsubdirs),
  184. XATTR_NAME_CEPH(dir, rbytes),
  185. XATTR_NAME_CEPH(dir, rctime),
  186. { .name = NULL, 0 } /* Required table terminator */
  187. };
  188. static size_t ceph_dir_vxattrs_name_size; /* total size of all names */
  189. /* files */
  190. static struct ceph_vxattr ceph_file_vxattrs[] = {
  191. {
  192. .name = "ceph.file.layout",
  193. .name_size = sizeof("ceph.file.layout"),
  194. .getxattr_cb = ceph_vxattrcb_layout,
  195. .readonly = false,
  196. .hidden = false,
  197. .exists_cb = ceph_vxattrcb_layout_exists,
  198. },
  199. XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
  200. XATTR_LAYOUT_FIELD(file, layout, stripe_count),
  201. XATTR_LAYOUT_FIELD(file, layout, object_size),
  202. XATTR_LAYOUT_FIELD(file, layout, pool),
  203. { .name = NULL, 0 } /* Required table terminator */
  204. };
  205. static size_t ceph_file_vxattrs_name_size; /* total size of all names */
  206. static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
  207. {
  208. if (S_ISDIR(inode->i_mode))
  209. return ceph_dir_vxattrs;
  210. else if (S_ISREG(inode->i_mode))
  211. return ceph_file_vxattrs;
  212. return NULL;
  213. }
  214. static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs)
  215. {
  216. if (vxattrs == ceph_dir_vxattrs)
  217. return ceph_dir_vxattrs_name_size;
  218. if (vxattrs == ceph_file_vxattrs)
  219. return ceph_file_vxattrs_name_size;
  220. BUG();
  221. return 0;
  222. }
  223. /*
  224. * Compute the aggregate size (including terminating '\0') of all
  225. * virtual extended attribute names in the given vxattr table.
  226. */
  227. static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
  228. {
  229. struct ceph_vxattr *vxattr;
  230. size_t size = 0;
  231. for (vxattr = vxattrs; vxattr->name; vxattr++)
  232. if (!vxattr->hidden)
  233. size += vxattr->name_size;
  234. return size;
  235. }
  236. /* Routines called at initialization and exit time */
  237. void __init ceph_xattr_init(void)
  238. {
  239. ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs);
  240. ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs);
  241. }
  242. void ceph_xattr_exit(void)
  243. {
  244. ceph_dir_vxattrs_name_size = 0;
  245. ceph_file_vxattrs_name_size = 0;
  246. }
  247. static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
  248. const char *name)
  249. {
  250. struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
  251. if (vxattr) {
  252. while (vxattr->name) {
  253. if (!strcmp(vxattr->name, name))
  254. return vxattr;
  255. vxattr++;
  256. }
  257. }
  258. return NULL;
  259. }
  260. static int __set_xattr(struct ceph_inode_info *ci,
  261. const char *name, int name_len,
  262. const char *val, int val_len,
  263. int dirty,
  264. int should_free_name, int should_free_val,
  265. struct ceph_inode_xattr **newxattr)
  266. {
  267. struct rb_node **p;
  268. struct rb_node *parent = NULL;
  269. struct ceph_inode_xattr *xattr = NULL;
  270. int c;
  271. int new = 0;
  272. p = &ci->i_xattrs.index.rb_node;
  273. while (*p) {
  274. parent = *p;
  275. xattr = rb_entry(parent, struct ceph_inode_xattr, node);
  276. c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
  277. if (c < 0)
  278. p = &(*p)->rb_left;
  279. else if (c > 0)
  280. p = &(*p)->rb_right;
  281. else {
  282. if (name_len == xattr->name_len)
  283. break;
  284. else if (name_len < xattr->name_len)
  285. p = &(*p)->rb_left;
  286. else
  287. p = &(*p)->rb_right;
  288. }
  289. xattr = NULL;
  290. }
  291. if (!xattr) {
  292. new = 1;
  293. xattr = *newxattr;
  294. xattr->name = name;
  295. xattr->name_len = name_len;
  296. xattr->should_free_name = should_free_name;
  297. ci->i_xattrs.count++;
  298. dout("__set_xattr count=%d\n", ci->i_xattrs.count);
  299. } else {
  300. kfree(*newxattr);
  301. *newxattr = NULL;
  302. if (xattr->should_free_val)
  303. kfree((void *)xattr->val);
  304. if (should_free_name) {
  305. kfree((void *)name);
  306. name = xattr->name;
  307. }
  308. ci->i_xattrs.names_size -= xattr->name_len;
  309. ci->i_xattrs.vals_size -= xattr->val_len;
  310. }
  311. ci->i_xattrs.names_size += name_len;
  312. ci->i_xattrs.vals_size += val_len;
  313. if (val)
  314. xattr->val = val;
  315. else
  316. xattr->val = "";
  317. xattr->val_len = val_len;
  318. xattr->dirty = dirty;
  319. xattr->should_free_val = (val && should_free_val);
  320. if (new) {
  321. rb_link_node(&xattr->node, parent, p);
  322. rb_insert_color(&xattr->node, &ci->i_xattrs.index);
  323. dout("__set_xattr_val p=%p\n", p);
  324. }
  325. dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n",
  326. ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val);
  327. return 0;
  328. }
  329. static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
  330. const char *name)
  331. {
  332. struct rb_node **p;
  333. struct rb_node *parent = NULL;
  334. struct ceph_inode_xattr *xattr = NULL;
  335. int name_len = strlen(name);
  336. int c;
  337. p = &ci->i_xattrs.index.rb_node;
  338. while (*p) {
  339. parent = *p;
  340. xattr = rb_entry(parent, struct ceph_inode_xattr, node);
  341. c = strncmp(name, xattr->name, xattr->name_len);
  342. if (c == 0 && name_len > xattr->name_len)
  343. c = 1;
  344. if (c < 0)
  345. p = &(*p)->rb_left;
  346. else if (c > 0)
  347. p = &(*p)->rb_right;
  348. else {
  349. dout("__get_xattr %s: found %.*s\n", name,
  350. xattr->val_len, xattr->val);
  351. return xattr;
  352. }
  353. }
  354. dout("__get_xattr %s: not found\n", name);
  355. return NULL;
  356. }
  357. static void __free_xattr(struct ceph_inode_xattr *xattr)
  358. {
  359. BUG_ON(!xattr);
  360. if (xattr->should_free_name)
  361. kfree((void *)xattr->name);
  362. if (xattr->should_free_val)
  363. kfree((void *)xattr->val);
  364. kfree(xattr);
  365. }
  366. static int __remove_xattr(struct ceph_inode_info *ci,
  367. struct ceph_inode_xattr *xattr)
  368. {
  369. if (!xattr)
  370. return -EOPNOTSUPP;
  371. rb_erase(&xattr->node, &ci->i_xattrs.index);
  372. if (xattr->should_free_name)
  373. kfree((void *)xattr->name);
  374. if (xattr->should_free_val)
  375. kfree((void *)xattr->val);
  376. ci->i_xattrs.names_size -= xattr->name_len;
  377. ci->i_xattrs.vals_size -= xattr->val_len;
  378. ci->i_xattrs.count--;
  379. kfree(xattr);
  380. return 0;
  381. }
  382. static int __remove_xattr_by_name(struct ceph_inode_info *ci,
  383. const char *name)
  384. {
  385. struct rb_node **p;
  386. struct ceph_inode_xattr *xattr;
  387. int err;
  388. p = &ci->i_xattrs.index.rb_node;
  389. xattr = __get_xattr(ci, name);
  390. err = __remove_xattr(ci, xattr);
  391. return err;
  392. }
  393. static char *__copy_xattr_names(struct ceph_inode_info *ci,
  394. char *dest)
  395. {
  396. struct rb_node *p;
  397. struct ceph_inode_xattr *xattr = NULL;
  398. p = rb_first(&ci->i_xattrs.index);
  399. dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);
  400. while (p) {
  401. xattr = rb_entry(p, struct ceph_inode_xattr, node);
  402. memcpy(dest, xattr->name, xattr->name_len);
  403. dest[xattr->name_len] = '\0';
  404. dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
  405. xattr->name_len, ci->i_xattrs.names_size);
  406. dest += xattr->name_len + 1;
  407. p = rb_next(p);
  408. }
  409. return dest;
  410. }
  411. void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
  412. {
  413. struct rb_node *p, *tmp;
  414. struct ceph_inode_xattr *xattr = NULL;
  415. p = rb_first(&ci->i_xattrs.index);
  416. dout("__ceph_destroy_xattrs p=%p\n", p);
  417. while (p) {
  418. xattr = rb_entry(p, struct ceph_inode_xattr, node);
  419. tmp = p;
  420. p = rb_next(tmp);
  421. dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
  422. xattr->name_len, xattr->name);
  423. rb_erase(tmp, &ci->i_xattrs.index);
  424. __free_xattr(xattr);
  425. }
  426. ci->i_xattrs.names_size = 0;
  427. ci->i_xattrs.vals_size = 0;
  428. ci->i_xattrs.index_version = 0;
  429. ci->i_xattrs.count = 0;
  430. ci->i_xattrs.index = RB_ROOT;
  431. }
  432. static int __build_xattrs(struct inode *inode)
  433. __releases(ci->i_ceph_lock)
  434. __acquires(ci->i_ceph_lock)
  435. {
  436. u32 namelen;
  437. u32 numattr = 0;
  438. void *p, *end;
  439. u32 len;
  440. const char *name, *val;
  441. struct ceph_inode_info *ci = ceph_inode(inode);
  442. int xattr_version;
  443. struct ceph_inode_xattr **xattrs = NULL;
  444. int err = 0;
  445. int i;
  446. dout("__build_xattrs() len=%d\n",
  447. ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
  448. if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
  449. return 0; /* already built */
  450. __ceph_destroy_xattrs(ci);
  451. start:
  452. /* updated internal xattr rb tree */
  453. if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
  454. p = ci->i_xattrs.blob->vec.iov_base;
  455. end = p + ci->i_xattrs.blob->vec.iov_len;
  456. ceph_decode_32_safe(&p, end, numattr, bad);
  457. xattr_version = ci->i_xattrs.version;
  458. spin_unlock(&ci->i_ceph_lock);
  459. xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *),
  460. GFP_NOFS);
  461. err = -ENOMEM;
  462. if (!xattrs)
  463. goto bad_lock;
  464. memset(xattrs, 0, numattr*sizeof(struct ceph_xattr *));
  465. for (i = 0; i < numattr; i++) {
  466. xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
  467. GFP_NOFS);
  468. if (!xattrs[i])
  469. goto bad_lock;
  470. }
  471. spin_lock(&ci->i_ceph_lock);
  472. if (ci->i_xattrs.version != xattr_version) {
  473. /* lost a race, retry */
  474. for (i = 0; i < numattr; i++)
  475. kfree(xattrs[i]);
  476. kfree(xattrs);
  477. xattrs = NULL;
  478. goto start;
  479. }
  480. err = -EIO;
  481. while (numattr--) {
  482. ceph_decode_32_safe(&p, end, len, bad);
  483. namelen = len;
  484. name = p;
  485. p += len;
  486. ceph_decode_32_safe(&p, end, len, bad);
  487. val = p;
  488. p += len;
  489. err = __set_xattr(ci, name, namelen, val, len,
  490. 0, 0, 0, &xattrs[numattr]);
  491. if (err < 0)
  492. goto bad;
  493. }
  494. kfree(xattrs);
  495. }
  496. ci->i_xattrs.index_version = ci->i_xattrs.version;
  497. ci->i_xattrs.dirty = false;
  498. return err;
  499. bad_lock:
  500. spin_lock(&ci->i_ceph_lock);
  501. bad:
  502. if (xattrs) {
  503. for (i = 0; i < numattr; i++)
  504. kfree(xattrs[i]);
  505. kfree(xattrs);
  506. }
  507. ci->i_xattrs.names_size = 0;
  508. return err;
  509. }
  510. static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
  511. int val_size)
  512. {
  513. /*
  514. * 4 bytes for the length, and additional 4 bytes per each xattr name,
  515. * 4 bytes per each value
  516. */
  517. int size = 4 + ci->i_xattrs.count*(4 + 4) +
  518. ci->i_xattrs.names_size +
  519. ci->i_xattrs.vals_size;
  520. dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
  521. ci->i_xattrs.count, ci->i_xattrs.names_size,
  522. ci->i_xattrs.vals_size);
  523. if (name_size)
  524. size += 4 + 4 + name_size + val_size;
  525. return size;
  526. }
  527. /*
  528. * If there are dirty xattrs, reencode xattrs into the prealloc_blob
  529. * and swap into place.
  530. */
  531. void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
  532. {
  533. struct rb_node *p;
  534. struct ceph_inode_xattr *xattr = NULL;
  535. void *dest;
  536. dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
  537. if (ci->i_xattrs.dirty) {
  538. int need = __get_required_blob_size(ci, 0, 0);
  539. BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);
  540. p = rb_first(&ci->i_xattrs.index);
  541. dest = ci->i_xattrs.prealloc_blob->vec.iov_base;
  542. ceph_encode_32(&dest, ci->i_xattrs.count);
  543. while (p) {
  544. xattr = rb_entry(p, struct ceph_inode_xattr, node);
  545. ceph_encode_32(&dest, xattr->name_len);
  546. memcpy(dest, xattr->name, xattr->name_len);
  547. dest += xattr->name_len;
  548. ceph_encode_32(&dest, xattr->val_len);
  549. memcpy(dest, xattr->val, xattr->val_len);
  550. dest += xattr->val_len;
  551. p = rb_next(p);
  552. }
  553. /* adjust buffer len; it may be larger than we need */
  554. ci->i_xattrs.prealloc_blob->vec.iov_len =
  555. dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
  556. if (ci->i_xattrs.blob)
  557. ceph_buffer_put(ci->i_xattrs.blob);
  558. ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
  559. ci->i_xattrs.prealloc_blob = NULL;
  560. ci->i_xattrs.dirty = false;
  561. ci->i_xattrs.version++;
  562. }
  563. }
  564. ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
  565. size_t size)
  566. {
  567. struct inode *inode = dentry->d_inode;
  568. struct ceph_inode_info *ci = ceph_inode(inode);
  569. int err;
  570. struct ceph_inode_xattr *xattr;
  571. struct ceph_vxattr *vxattr = NULL;
  572. if (!ceph_is_valid_xattr(name))
  573. return -ENODATA;
  574. spin_lock(&ci->i_ceph_lock);
  575. dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
  576. ci->i_xattrs.version, ci->i_xattrs.index_version);
  577. /* let's see if a virtual xattr was requested */
  578. vxattr = ceph_match_vxattr(inode, name);
  579. if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
  580. err = vxattr->getxattr_cb(ci, value, size);
  581. goto out;
  582. }
  583. if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
  584. (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
  585. goto get_xattr;
  586. } else {
  587. spin_unlock(&ci->i_ceph_lock);
  588. /* get xattrs from mds (if we don't already have them) */
  589. err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
  590. if (err)
  591. return err;
  592. }
  593. spin_lock(&ci->i_ceph_lock);
  594. err = __build_xattrs(inode);
  595. if (err < 0)
  596. goto out;
  597. get_xattr:
  598. err = -ENODATA; /* == ENOATTR */
  599. xattr = __get_xattr(ci, name);
  600. if (!xattr)
  601. goto out;
  602. err = -ERANGE;
  603. if (size && size < xattr->val_len)
  604. goto out;
  605. err = xattr->val_len;
  606. if (size == 0)
  607. goto out;
  608. memcpy(value, xattr->val, xattr->val_len);
  609. out:
  610. spin_unlock(&ci->i_ceph_lock);
  611. return err;
  612. }
  613. ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
  614. {
  615. struct inode *inode = dentry->d_inode;
  616. struct ceph_inode_info *ci = ceph_inode(inode);
  617. struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
  618. u32 vir_namelen = 0;
  619. u32 namelen;
  620. int err;
  621. u32 len;
  622. int i;
  623. spin_lock(&ci->i_ceph_lock);
  624. dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
  625. ci->i_xattrs.version, ci->i_xattrs.index_version);
  626. if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
  627. (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
  628. goto list_xattr;
  629. } else {
  630. spin_unlock(&ci->i_ceph_lock);
  631. err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
  632. if (err)
  633. return err;
  634. }
  635. spin_lock(&ci->i_ceph_lock);
  636. err = __build_xattrs(inode);
  637. if (err < 0)
  638. goto out;
  639. list_xattr:
  640. /*
  641. * Start with virtual dir xattr names (if any) (including
  642. * terminating '\0' characters for each).
  643. */
  644. vir_namelen = ceph_vxattrs_name_size(vxattrs);
  645. /* adding 1 byte per each variable due to the null termination */
  646. namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
  647. err = -ERANGE;
  648. if (size && vir_namelen + namelen > size)
  649. goto out;
  650. err = namelen + vir_namelen;
  651. if (size == 0)
  652. goto out;
  653. names = __copy_xattr_names(ci, names);
  654. /* virtual xattr names, too */
  655. err = namelen;
  656. if (vxattrs) {
  657. for (i = 0; vxattrs[i].name; i++) {
  658. if (!vxattrs[i].hidden &&
  659. !(vxattrs[i].exists_cb &&
  660. !vxattrs[i].exists_cb(ci))) {
  661. len = sprintf(names, "%s", vxattrs[i].name);
  662. names += len + 1;
  663. err += len + 1;
  664. }
  665. }
  666. }
  667. out:
  668. spin_unlock(&ci->i_ceph_lock);
  669. return err;
  670. }
  671. static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
  672. const char *value, size_t size, int flags)
  673. {
  674. struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
  675. struct inode *inode = dentry->d_inode;
  676. struct ceph_inode_info *ci = ceph_inode(inode);
  677. struct inode *parent_inode;
  678. struct ceph_mds_request *req;
  679. struct ceph_mds_client *mdsc = fsc->mdsc;
  680. int err;
  681. int i, nr_pages;
  682. struct page **pages = NULL;
  683. void *kaddr;
  684. /* copy value into some pages */
  685. nr_pages = calc_pages_for(0, size);
  686. if (nr_pages) {
  687. pages = kmalloc(sizeof(pages[0])*nr_pages, GFP_NOFS);
  688. if (!pages)
  689. return -ENOMEM;
  690. err = -ENOMEM;
  691. for (i = 0; i < nr_pages; i++) {
  692. pages[i] = __page_cache_alloc(GFP_NOFS);
  693. if (!pages[i]) {
  694. nr_pages = i;
  695. goto out;
  696. }
  697. kaddr = kmap(pages[i]);
  698. memcpy(kaddr, value + i*PAGE_CACHE_SIZE,
  699. min(PAGE_CACHE_SIZE, size-i*PAGE_CACHE_SIZE));
  700. }
  701. }
  702. dout("setxattr value=%.*s\n", (int)size, value);
  703. /* do request */
  704. req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETXATTR,
  705. USE_AUTH_MDS);
  706. if (IS_ERR(req)) {
  707. err = PTR_ERR(req);
  708. goto out;
  709. }
  710. req->r_inode = inode;
  711. ihold(inode);
  712. req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
  713. req->r_num_caps = 1;
  714. req->r_args.setxattr.flags = cpu_to_le32(flags);
  715. req->r_path2 = kstrdup(name, GFP_NOFS);
  716. req->r_pages = pages;
  717. req->r_num_pages = nr_pages;
  718. req->r_data_len = size;
  719. dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
  720. parent_inode = ceph_get_dentry_parent_inode(dentry);
  721. err = ceph_mdsc_do_request(mdsc, parent_inode, req);
  722. iput(parent_inode);
  723. ceph_mdsc_put_request(req);
  724. dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
  725. out:
  726. if (pages) {
  727. for (i = 0; i < nr_pages; i++)
  728. __free_page(pages[i]);
  729. kfree(pages);
  730. }
  731. return err;
  732. }
  733. int ceph_setxattr(struct dentry *dentry, const char *name,
  734. const void *value, size_t size, int flags)
  735. {
  736. struct inode *inode = dentry->d_inode;
  737. struct ceph_vxattr *vxattr;
  738. struct ceph_inode_info *ci = ceph_inode(inode);
  739. int issued;
  740. int err;
  741. int dirty;
  742. int name_len = strlen(name);
  743. int val_len = size;
  744. char *newname = NULL;
  745. char *newval = NULL;
  746. struct ceph_inode_xattr *xattr = NULL;
  747. int required_blob_size;
  748. if (ceph_snap(inode) != CEPH_NOSNAP)
  749. return -EROFS;
  750. if (!ceph_is_valid_xattr(name))
  751. return -EOPNOTSUPP;
  752. vxattr = ceph_match_vxattr(inode, name);
  753. if (vxattr && vxattr->readonly)
  754. return -EOPNOTSUPP;
  755. /* pass any unhandled ceph.* xattrs through to the MDS */
  756. if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
  757. goto do_sync_unlocked;
  758. /* preallocate memory for xattr name, value, index node */
  759. err = -ENOMEM;
  760. newname = kmemdup(name, name_len + 1, GFP_NOFS);
  761. if (!newname)
  762. goto out;
  763. if (val_len) {
  764. newval = kmemdup(value, val_len, GFP_NOFS);
  765. if (!newval)
  766. goto out;
  767. }
  768. xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
  769. if (!xattr)
  770. goto out;
  771. spin_lock(&ci->i_ceph_lock);
  772. retry:
  773. issued = __ceph_caps_issued(ci, NULL);
  774. dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
  775. if (!(issued & CEPH_CAP_XATTR_EXCL))
  776. goto do_sync;
  777. __build_xattrs(inode);
  778. required_blob_size = __get_required_blob_size(ci, name_len, val_len);
  779. if (!ci->i_xattrs.prealloc_blob ||
  780. required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
  781. struct ceph_buffer *blob;
  782. spin_unlock(&ci->i_ceph_lock);
  783. dout(" preaallocating new blob size=%d\n", required_blob_size);
  784. blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
  785. if (!blob)
  786. goto out;
  787. spin_lock(&ci->i_ceph_lock);
  788. if (ci->i_xattrs.prealloc_blob)
  789. ceph_buffer_put(ci->i_xattrs.prealloc_blob);
  790. ci->i_xattrs.prealloc_blob = blob;
  791. goto retry;
  792. }
  793. err = __set_xattr(ci, newname, name_len, newval,
  794. val_len, 1, 1, 1, &xattr);
  795. dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
  796. ci->i_xattrs.dirty = true;
  797. inode->i_ctime = CURRENT_TIME;
  798. spin_unlock(&ci->i_ceph_lock);
  799. if (dirty)
  800. __mark_inode_dirty(inode, dirty);
  801. return err;
  802. do_sync:
  803. spin_unlock(&ci->i_ceph_lock);
  804. do_sync_unlocked:
  805. err = ceph_sync_setxattr(dentry, name, value, size, flags);
  806. out:
  807. kfree(newname);
  808. kfree(newval);
  809. kfree(xattr);
  810. return err;
  811. }
  812. static int ceph_send_removexattr(struct dentry *dentry, const char *name)
  813. {
  814. struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
  815. struct ceph_mds_client *mdsc = fsc->mdsc;
  816. struct inode *inode = dentry->d_inode;
  817. struct inode *parent_inode;
  818. struct ceph_mds_request *req;
  819. int err;
  820. req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RMXATTR,
  821. USE_AUTH_MDS);
  822. if (IS_ERR(req))
  823. return PTR_ERR(req);
  824. req->r_inode = inode;
  825. ihold(inode);
  826. req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
  827. req->r_num_caps = 1;
  828. req->r_path2 = kstrdup(name, GFP_NOFS);
  829. parent_inode = ceph_get_dentry_parent_inode(dentry);
  830. err = ceph_mdsc_do_request(mdsc, parent_inode, req);
  831. iput(parent_inode);
  832. ceph_mdsc_put_request(req);
  833. return err;
  834. }
  835. int ceph_removexattr(struct dentry *dentry, const char *name)
  836. {
  837. struct inode *inode = dentry->d_inode;
  838. struct ceph_vxattr *vxattr;
  839. struct ceph_inode_info *ci = ceph_inode(inode);
  840. int issued;
  841. int err;
  842. int required_blob_size;
  843. int dirty;
  844. if (ceph_snap(inode) != CEPH_NOSNAP)
  845. return -EROFS;
  846. if (!ceph_is_valid_xattr(name))
  847. return -EOPNOTSUPP;
  848. vxattr = ceph_match_vxattr(inode, name);
  849. if (vxattr && vxattr->readonly)
  850. return -EOPNOTSUPP;
  851. /* pass any unhandled ceph.* xattrs through to the MDS */
  852. if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
  853. goto do_sync_unlocked;
  854. err = -ENOMEM;
  855. spin_lock(&ci->i_ceph_lock);
  856. retry:
  857. issued = __ceph_caps_issued(ci, NULL);
  858. dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
  859. if (!(issued & CEPH_CAP_XATTR_EXCL))
  860. goto do_sync;
  861. __build_xattrs(inode);
  862. required_blob_size = __get_required_blob_size(ci, 0, 0);
  863. if (!ci->i_xattrs.prealloc_blob ||
  864. required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
  865. struct ceph_buffer *blob;
  866. spin_unlock(&ci->i_ceph_lock);
  867. dout(" preaallocating new blob size=%d\n", required_blob_size);
  868. blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
  869. if (!blob)
  870. goto out;
  871. spin_lock(&ci->i_ceph_lock);
  872. if (ci->i_xattrs.prealloc_blob)
  873. ceph_buffer_put(ci->i_xattrs.prealloc_blob);
  874. ci->i_xattrs.prealloc_blob = blob;
  875. goto retry;
  876. }
  877. err = __remove_xattr_by_name(ceph_inode(inode), name);
  878. dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
  879. ci->i_xattrs.dirty = true;
  880. inode->i_ctime = CURRENT_TIME;
  881. spin_unlock(&ci->i_ceph_lock);
  882. if (dirty)
  883. __mark_inode_dirty(inode, dirty);
  884. return err;
  885. do_sync:
  886. spin_unlock(&ci->i_ceph_lock);
  887. do_sync_unlocked:
  888. err = ceph_send_removexattr(dentry, name);
  889. out:
  890. return err;
  891. }