mpssd.c 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701
  1. /*
  2. * Intel MIC Platform Software Stack (MPSS)
  3. *
  4. * Copyright(c) 2013 Intel Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License, version 2, as
  8. * published by the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * The full GNU General Public License is included in this distribution in
  16. * the file called "COPYING".
  17. *
  18. * Intel MIC User Space Tools.
  19. */
  20. #define _GNU_SOURCE
  21. #include <stdlib.h>
  22. #include <fcntl.h>
  23. #include <getopt.h>
  24. #include <assert.h>
  25. #include <unistd.h>
  26. #include <stdbool.h>
  27. #include <signal.h>
  28. #include <poll.h>
  29. #include <features.h>
  30. #include <sys/types.h>
  31. #include <sys/stat.h>
  32. #include <sys/mman.h>
  33. #include <sys/socket.h>
  34. #include <linux/virtio_ring.h>
  35. #include <linux/virtio_net.h>
  36. #include <linux/virtio_console.h>
  37. #include <linux/virtio_blk.h>
  38. #include <linux/version.h>
  39. #include "mpssd.h"
  40. #include <linux/mic_ioctl.h>
  41. #include <linux/mic_common.h>
  42. static void init_mic(struct mic_info *mic);
  43. static FILE *logfp;
  44. static struct mic_info mic_list;
  45. #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
  46. #define min_t(type, x, y) ({ \
  47. type __min1 = (x); \
  48. type __min2 = (y); \
  49. __min1 < __min2 ? __min1 : __min2; })
  50. /* align addr on a size boundary - adjust address up/down if needed */
  51. #define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1)))
  52. #define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size)
  53. /* align addr on a size boundary - adjust address up if needed */
  54. #define _ALIGN(addr, size) _ALIGN_UP(addr, size)
  55. /* to align the pointer to the (next) page boundary */
  56. #define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE)
  57. #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
  58. #define GSO_ENABLED 1
  59. #define MAX_GSO_SIZE (64 * 1024)
  60. #define ETH_H_LEN 14
  61. #define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
  62. #define MIC_DEVICE_PAGE_END 0x1000
  63. #ifndef VIRTIO_NET_HDR_F_DATA_VALID
  64. #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
  65. #endif
  66. static struct {
  67. struct mic_device_desc dd;
  68. struct mic_vqconfig vqconfig[2];
  69. __u32 host_features, guest_acknowledgements;
  70. struct virtio_console_config cons_config;
  71. } virtcons_dev_page = {
  72. .dd = {
  73. .type = VIRTIO_ID_CONSOLE,
  74. .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
  75. .feature_len = sizeof(virtcons_dev_page.host_features),
  76. .config_len = sizeof(virtcons_dev_page.cons_config),
  77. },
  78. .vqconfig[0] = {
  79. .num = htole16(MIC_VRING_ENTRIES),
  80. },
  81. .vqconfig[1] = {
  82. .num = htole16(MIC_VRING_ENTRIES),
  83. },
  84. };
  85. static struct {
  86. struct mic_device_desc dd;
  87. struct mic_vqconfig vqconfig[2];
  88. __u32 host_features, guest_acknowledgements;
  89. struct virtio_net_config net_config;
  90. } virtnet_dev_page = {
  91. .dd = {
  92. .type = VIRTIO_ID_NET,
  93. .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
  94. .feature_len = sizeof(virtnet_dev_page.host_features),
  95. .config_len = sizeof(virtnet_dev_page.net_config),
  96. },
  97. .vqconfig[0] = {
  98. .num = htole16(MIC_VRING_ENTRIES),
  99. },
  100. .vqconfig[1] = {
  101. .num = htole16(MIC_VRING_ENTRIES),
  102. },
  103. #if GSO_ENABLED
  104. .host_features = htole32(
  105. 1 << VIRTIO_NET_F_CSUM |
  106. 1 << VIRTIO_NET_F_GSO |
  107. 1 << VIRTIO_NET_F_GUEST_TSO4 |
  108. 1 << VIRTIO_NET_F_GUEST_TSO6 |
  109. 1 << VIRTIO_NET_F_GUEST_ECN |
  110. 1 << VIRTIO_NET_F_GUEST_UFO),
  111. #else
  112. .host_features = 0,
  113. #endif
  114. };
  115. static const char *mic_config_dir = "/etc/sysconfig/mic";
  116. static const char *virtblk_backend = "VIRTBLK_BACKEND";
  117. static struct {
  118. struct mic_device_desc dd;
  119. struct mic_vqconfig vqconfig[1];
  120. __u32 host_features, guest_acknowledgements;
  121. struct virtio_blk_config blk_config;
  122. } virtblk_dev_page = {
  123. .dd = {
  124. .type = VIRTIO_ID_BLOCK,
  125. .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
  126. .feature_len = sizeof(virtblk_dev_page.host_features),
  127. .config_len = sizeof(virtblk_dev_page.blk_config),
  128. },
  129. .vqconfig[0] = {
  130. .num = htole16(MIC_VRING_ENTRIES),
  131. },
  132. .host_features =
  133. htole32(1<<VIRTIO_BLK_F_SEG_MAX),
  134. .blk_config = {
  135. .seg_max = htole32(MIC_VRING_ENTRIES - 2),
  136. .capacity = htole64(0),
  137. }
  138. };
  139. static char *myname;
  140. static int
  141. tap_configure(struct mic_info *mic, char *dev)
  142. {
  143. pid_t pid;
  144. char *ifargv[7];
  145. char ipaddr[IFNAMSIZ];
  146. int ret = 0;
  147. pid = fork();
  148. if (pid == 0) {
  149. ifargv[0] = "ip";
  150. ifargv[1] = "link";
  151. ifargv[2] = "set";
  152. ifargv[3] = dev;
  153. ifargv[4] = "up";
  154. ifargv[5] = NULL;
  155. mpsslog("Configuring %s\n", dev);
  156. ret = execvp("ip", ifargv);
  157. if (ret < 0) {
  158. mpsslog("%s execvp failed errno %s\n",
  159. mic->name, strerror(errno));
  160. return ret;
  161. }
  162. }
  163. if (pid < 0) {
  164. mpsslog("%s fork failed errno %s\n",
  165. mic->name, strerror(errno));
  166. return ret;
  167. }
  168. ret = waitpid(pid, NULL, 0);
  169. if (ret < 0) {
  170. mpsslog("%s waitpid failed errno %s\n",
  171. mic->name, strerror(errno));
  172. return ret;
  173. }
  174. snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id);
  175. pid = fork();
  176. if (pid == 0) {
  177. ifargv[0] = "ip";
  178. ifargv[1] = "addr";
  179. ifargv[2] = "add";
  180. ifargv[3] = ipaddr;
  181. ifargv[4] = "dev";
  182. ifargv[5] = dev;
  183. ifargv[6] = NULL;
  184. mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
  185. ret = execvp("ip", ifargv);
  186. if (ret < 0) {
  187. mpsslog("%s execvp failed errno %s\n",
  188. mic->name, strerror(errno));
  189. return ret;
  190. }
  191. }
  192. if (pid < 0) {
  193. mpsslog("%s fork failed errno %s\n",
  194. mic->name, strerror(errno));
  195. return ret;
  196. }
  197. ret = waitpid(pid, NULL, 0);
  198. if (ret < 0) {
  199. mpsslog("%s waitpid failed errno %s\n",
  200. mic->name, strerror(errno));
  201. return ret;
  202. }
  203. mpsslog("MIC name %s %s %d DONE!\n",
  204. mic->name, __func__, __LINE__);
  205. return 0;
  206. }
  207. static int tun_alloc(struct mic_info *mic, char *dev)
  208. {
  209. struct ifreq ifr;
  210. int fd, err;
  211. #if GSO_ENABLED
  212. unsigned offload;
  213. #endif
  214. fd = open("/dev/net/tun", O_RDWR);
  215. if (fd < 0) {
  216. mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
  217. goto done;
  218. }
  219. memset(&ifr, 0, sizeof(ifr));
  220. ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
  221. if (*dev)
  222. strncpy(ifr.ifr_name, dev, IFNAMSIZ);
  223. err = ioctl(fd, TUNSETIFF, (void *) &ifr);
  224. if (err < 0) {
  225. mpsslog("%s %s %d TUNSETIFF failed %s\n",
  226. mic->name, __func__, __LINE__, strerror(errno));
  227. close(fd);
  228. return err;
  229. }
  230. #if GSO_ENABLED
  231. offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
  232. TUN_F_TSO_ECN | TUN_F_UFO;
  233. err = ioctl(fd, TUNSETOFFLOAD, offload);
  234. if (err < 0) {
  235. mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
  236. mic->name, __func__, __LINE__, strerror(errno));
  237. close(fd);
  238. return err;
  239. }
  240. #endif
  241. strcpy(dev, ifr.ifr_name);
  242. mpsslog("Created TAP %s\n", dev);
  243. done:
  244. return fd;
  245. }
  246. #define NET_FD_VIRTIO_NET 0
  247. #define NET_FD_TUN 1
  248. #define MAX_NET_FD 2
  249. static void set_dp(struct mic_info *mic, int type, void *dp)
  250. {
  251. switch (type) {
  252. case VIRTIO_ID_CONSOLE:
  253. mic->mic_console.console_dp = dp;
  254. return;
  255. case VIRTIO_ID_NET:
  256. mic->mic_net.net_dp = dp;
  257. return;
  258. case VIRTIO_ID_BLOCK:
  259. mic->mic_virtblk.block_dp = dp;
  260. return;
  261. }
  262. mpsslog("%s %s %d not found\n", mic->name, __func__, type);
  263. assert(0);
  264. }
  265. static void *get_dp(struct mic_info *mic, int type)
  266. {
  267. switch (type) {
  268. case VIRTIO_ID_CONSOLE:
  269. return mic->mic_console.console_dp;
  270. case VIRTIO_ID_NET:
  271. return mic->mic_net.net_dp;
  272. case VIRTIO_ID_BLOCK:
  273. return mic->mic_virtblk.block_dp;
  274. }
  275. mpsslog("%s %s %d not found\n", mic->name, __func__, type);
  276. assert(0);
  277. return NULL;
  278. }
  279. static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
  280. {
  281. struct mic_device_desc *d;
  282. int i;
  283. void *dp = get_dp(mic, type);
  284. for (i = mic_aligned_size(struct mic_bootparam); i < PAGE_SIZE;
  285. i += mic_total_desc_size(d)) {
  286. d = dp + i;
  287. /* End of list */
  288. if (d->type == 0)
  289. break;
  290. if (d->type == -1)
  291. continue;
  292. mpsslog("%s %s d-> type %d d %p\n",
  293. mic->name, __func__, d->type, d);
  294. if (d->type == (__u8)type)
  295. return d;
  296. }
  297. mpsslog("%s %s %d not found\n", mic->name, __func__, type);
  298. assert(0);
  299. return NULL;
  300. }
  301. /* See comments in vhost.c for explanation of next_desc() */
  302. static unsigned next_desc(struct vring_desc *desc)
  303. {
  304. unsigned int next;
  305. if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
  306. return -1U;
  307. next = le16toh(desc->next);
  308. return next;
  309. }
  310. /* Sum up all the IOVEC length */
  311. static ssize_t
  312. sum_iovec_len(struct mic_copy_desc *copy)
  313. {
  314. ssize_t sum = 0;
  315. int i;
  316. for (i = 0; i < copy->iovcnt; i++)
  317. sum += copy->iov[i].iov_len;
  318. return sum;
  319. }
  320. static inline void verify_out_len(struct mic_info *mic,
  321. struct mic_copy_desc *copy)
  322. {
  323. if (copy->out_len != sum_iovec_len(copy)) {
  324. mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%x\n",
  325. mic->name, __func__, __LINE__,
  326. copy->out_len, sum_iovec_len(copy));
  327. assert(copy->out_len == sum_iovec_len(copy));
  328. }
  329. }
  330. /* Display an iovec */
  331. static void
  332. disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
  333. const char *s, int line)
  334. {
  335. int i;
  336. for (i = 0; i < copy->iovcnt; i++)
  337. mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%lx\n",
  338. mic->name, s, line, i,
  339. copy->iov[i].iov_base, copy->iov[i].iov_len);
  340. }
  341. static inline __u16 read_avail_idx(struct mic_vring *vr)
  342. {
  343. return ACCESS_ONCE(vr->info->avail_idx);
  344. }
  345. static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
  346. struct mic_copy_desc *copy, ssize_t len)
  347. {
  348. copy->vr_idx = tx ? 0 : 1;
  349. copy->update_used = true;
  350. if (type == VIRTIO_ID_NET)
  351. copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
  352. else
  353. copy->iov[0].iov_len = len;
  354. }
  355. /* Central API which triggers the copies */
  356. static int
  357. mic_virtio_copy(struct mic_info *mic, int fd,
  358. struct mic_vring *vr, struct mic_copy_desc *copy)
  359. {
  360. int ret;
  361. ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
  362. if (ret) {
  363. mpsslog("%s %s %d errno %s ret %d\n",
  364. mic->name, __func__, __LINE__,
  365. strerror(errno), ret);
  366. }
  367. return ret;
  368. }
  369. /*
  370. * This initialization routine requires at least one
  371. * vring i.e. vr0. vr1 is optional.
  372. */
  373. static void *
  374. init_vr(struct mic_info *mic, int fd, int type,
  375. struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
  376. {
  377. int vr_size;
  378. char *va;
  379. vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
  380. MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
  381. va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
  382. PROT_READ, MAP_SHARED, fd, 0);
  383. if (MAP_FAILED == va) {
  384. mpsslog("%s %s %d mmap failed errno %s\n",
  385. mic->name, __func__, __LINE__,
  386. strerror(errno));
  387. goto done;
  388. }
  389. set_dp(mic, type, va);
  390. vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
  391. vr0->info = vr0->va +
  392. vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
  393. vring_init(&vr0->vr,
  394. MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
  395. mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
  396. __func__, mic->name, vr0->va, vr0->info, vr_size,
  397. vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
  398. mpsslog("magic 0x%x expected 0x%x\n",
  399. vr0->info->magic, MIC_MAGIC + type);
  400. assert(vr0->info->magic == MIC_MAGIC + type);
  401. if (vr1) {
  402. vr1->va = (struct mic_vring *)
  403. &va[MIC_DEVICE_PAGE_END + vr_size];
  404. vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES,
  405. MIC_VIRTIO_RING_ALIGN);
  406. vring_init(&vr1->vr,
  407. MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
  408. mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
  409. __func__, mic->name, vr1->va, vr1->info, vr_size,
  410. vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
  411. mpsslog("magic 0x%x expected 0x%x\n",
  412. vr1->info->magic, MIC_MAGIC + type + 1);
  413. assert(vr1->info->magic == MIC_MAGIC + type + 1);
  414. }
  415. done:
  416. return va;
  417. }
  418. static void
  419. wait_for_card_driver(struct mic_info *mic, int fd, int type)
  420. {
  421. struct pollfd pollfd;
  422. int err;
  423. struct mic_device_desc *desc = get_device_desc(mic, type);
  424. pollfd.fd = fd;
  425. mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
  426. mic->name, __func__, type, desc->status);
  427. while (1) {
  428. pollfd.events = POLLIN;
  429. pollfd.revents = 0;
  430. err = poll(&pollfd, 1, -1);
  431. if (err < 0) {
  432. mpsslog("%s %s poll failed %s\n",
  433. mic->name, __func__, strerror(errno));
  434. continue;
  435. }
  436. if (pollfd.revents) {
  437. mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n",
  438. mic->name, __func__, type, desc->status);
  439. if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
  440. mpsslog("%s %s poll.revents %d\n",
  441. mic->name, __func__, pollfd.revents);
  442. mpsslog("%s %s desc-> type %d status 0x%x\n",
  443. mic->name, __func__, type,
  444. desc->status);
  445. break;
  446. }
  447. }
  448. }
  449. }
  450. /* Spin till we have some descriptors */
  451. static void
  452. spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
  453. {
  454. __u16 avail_idx = read_avail_idx(vr);
  455. while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
  456. #ifdef DEBUG
  457. mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
  458. mic->name, __func__,
  459. le16toh(vr->vr.avail->idx), vr->info->avail_idx);
  460. #endif
  461. sched_yield();
  462. }
  463. }
  464. static void *
  465. virtio_net(void *arg)
  466. {
  467. static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
  468. static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __aligned(64);
  469. struct iovec vnet_iov[2][2] = {
  470. { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
  471. { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
  472. { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
  473. { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
  474. };
  475. struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
  476. struct mic_info *mic = (struct mic_info *)arg;
  477. char if_name[IFNAMSIZ];
  478. struct pollfd net_poll[MAX_NET_FD];
  479. struct mic_vring tx_vr, rx_vr;
  480. struct mic_copy_desc copy;
  481. struct mic_device_desc *desc;
  482. int err;
  483. snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
  484. mic->mic_net.tap_fd = tun_alloc(mic, if_name);
  485. if (mic->mic_net.tap_fd < 0)
  486. goto done;
  487. if (tap_configure(mic, if_name))
  488. goto done;
  489. mpsslog("MIC name %s id %d\n", mic->name, mic->id);
  490. net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
  491. net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
  492. net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
  493. net_poll[NET_FD_TUN].events = POLLIN;
  494. if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
  495. VIRTIO_ID_NET, &tx_vr, &rx_vr,
  496. virtnet_dev_page.dd.num_vq)) {
  497. mpsslog("%s init_vr failed %s\n",
  498. mic->name, strerror(errno));
  499. goto done;
  500. }
  501. copy.iovcnt = 2;
  502. desc = get_device_desc(mic, VIRTIO_ID_NET);
  503. while (1) {
  504. ssize_t len;
  505. net_poll[NET_FD_VIRTIO_NET].revents = 0;
  506. net_poll[NET_FD_TUN].revents = 0;
  507. /* Start polling for data from tap and virtio net */
  508. err = poll(net_poll, 2, -1);
  509. if (err < 0) {
  510. mpsslog("%s poll failed %s\n",
  511. __func__, strerror(errno));
  512. continue;
  513. }
  514. if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
  515. wait_for_card_driver(mic, mic->mic_net.virtio_net_fd,
  516. VIRTIO_ID_NET);
  517. /*
  518. * Check if there is data to be read from TUN and write to
  519. * virtio net fd if there is.
  520. */
  521. if (net_poll[NET_FD_TUN].revents & POLLIN) {
  522. copy.iov = iov0;
  523. len = readv(net_poll[NET_FD_TUN].fd,
  524. copy.iov, copy.iovcnt);
  525. if (len > 0) {
  526. struct virtio_net_hdr *hdr
  527. = (struct virtio_net_hdr *) vnet_hdr[0];
  528. /* Disable checksums on the card since we are on
  529. a reliable PCIe link */
  530. hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
  531. #ifdef DEBUG
  532. mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
  533. __func__, __LINE__, hdr->flags);
  534. mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
  535. copy.out_len, hdr->gso_type);
  536. #endif
  537. #ifdef DEBUG
  538. disp_iovec(mic, copy, __func__, __LINE__);
  539. mpsslog("%s %s %d read from tap 0x%lx\n",
  540. mic->name, __func__, __LINE__,
  541. len);
  542. #endif
  543. spin_for_descriptors(mic, &tx_vr);
  544. txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
  545. len);
  546. err = mic_virtio_copy(mic,
  547. mic->mic_net.virtio_net_fd, &tx_vr,
  548. &copy);
  549. if (err < 0) {
  550. mpsslog("%s %s %d mic_virtio_copy %s\n",
  551. mic->name, __func__, __LINE__,
  552. strerror(errno));
  553. }
  554. if (!err)
  555. verify_out_len(mic, &copy);
  556. #ifdef DEBUG
  557. disp_iovec(mic, copy, __func__, __LINE__);
  558. mpsslog("%s %s %d wrote to net 0x%lx\n",
  559. mic->name, __func__, __LINE__,
  560. sum_iovec_len(&copy));
  561. #endif
  562. /* Reinitialize IOV for next run */
  563. iov0[1].iov_len = MAX_NET_PKT_SIZE;
  564. } else if (len < 0) {
  565. disp_iovec(mic, &copy, __func__, __LINE__);
  566. mpsslog("%s %s %d read failed %s ", mic->name,
  567. __func__, __LINE__, strerror(errno));
  568. mpsslog("cnt %d sum %d\n",
  569. copy.iovcnt, sum_iovec_len(&copy));
  570. }
  571. }
  572. /*
  573. * Check if there is data to be read from virtio net and
  574. * write to TUN if there is.
  575. */
  576. if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
  577. while (rx_vr.info->avail_idx !=
  578. le16toh(rx_vr.vr.avail->idx)) {
  579. copy.iov = iov1;
  580. txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
  581. MAX_NET_PKT_SIZE
  582. + sizeof(struct virtio_net_hdr));
  583. err = mic_virtio_copy(mic,
  584. mic->mic_net.virtio_net_fd, &rx_vr,
  585. &copy);
  586. if (!err) {
  587. #ifdef DEBUG
  588. struct virtio_net_hdr *hdr
  589. = (struct virtio_net_hdr *)
  590. vnet_hdr[1];
  591. mpsslog("%s %s %d hdr->flags 0x%x, ",
  592. mic->name, __func__, __LINE__,
  593. hdr->flags);
  594. mpsslog("out_len %d gso_type 0x%x\n",
  595. copy.out_len,
  596. hdr->gso_type);
  597. #endif
  598. /* Set the correct output iov_len */
  599. iov1[1].iov_len = copy.out_len -
  600. sizeof(struct virtio_net_hdr);
  601. verify_out_len(mic, &copy);
  602. #ifdef DEBUG
  603. disp_iovec(mic, copy, __func__,
  604. __LINE__);
  605. mpsslog("%s %s %d ",
  606. mic->name, __func__, __LINE__);
  607. mpsslog("read from net 0x%lx\n",
  608. sum_iovec_len(copy));
  609. #endif
  610. len = writev(net_poll[NET_FD_TUN].fd,
  611. copy.iov, copy.iovcnt);
  612. if (len != sum_iovec_len(&copy)) {
  613. mpsslog("Tun write failed %s ",
  614. strerror(errno));
  615. mpsslog("len 0x%x ", len);
  616. mpsslog("read_len 0x%x\n",
  617. sum_iovec_len(&copy));
  618. } else {
  619. #ifdef DEBUG
  620. disp_iovec(mic, &copy, __func__,
  621. __LINE__);
  622. mpsslog("%s %s %d ",
  623. mic->name, __func__,
  624. __LINE__);
  625. mpsslog("wrote to tap 0x%lx\n",
  626. len);
  627. #endif
  628. }
  629. } else {
  630. mpsslog("%s %s %d mic_virtio_copy %s\n",
  631. mic->name, __func__, __LINE__,
  632. strerror(errno));
  633. break;
  634. }
  635. }
  636. }
  637. if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
  638. mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
  639. }
  640. done:
  641. pthread_exit(NULL);
  642. }
  643. /* virtio_console */
  644. #define VIRTIO_CONSOLE_FD 0
  645. #define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
  646. #define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */
  647. #define MAX_BUFFER_SIZE PAGE_SIZE
  648. static void *
  649. virtio_console(void *arg)
  650. {
  651. static __u8 vcons_buf[2][PAGE_SIZE];
  652. struct iovec vcons_iov[2] = {
  653. { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
  654. { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
  655. };
  656. struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
  657. struct mic_info *mic = (struct mic_info *)arg;
  658. int err;
  659. struct pollfd console_poll[MAX_CONSOLE_FD];
  660. int pty_fd;
  661. char *pts_name;
  662. ssize_t len;
  663. struct mic_vring tx_vr, rx_vr;
  664. struct mic_copy_desc copy;
  665. struct mic_device_desc *desc;
  666. pty_fd = posix_openpt(O_RDWR);
  667. if (pty_fd < 0) {
  668. mpsslog("can't open a pseudoterminal master device: %s\n",
  669. strerror(errno));
  670. goto _return;
  671. }
  672. pts_name = ptsname(pty_fd);
  673. if (pts_name == NULL) {
  674. mpsslog("can't get pts name\n");
  675. goto _close_pty;
  676. }
  677. printf("%s console message goes to %s\n", mic->name, pts_name);
  678. mpsslog("%s console message goes to %s\n", mic->name, pts_name);
  679. err = grantpt(pty_fd);
  680. if (err < 0) {
  681. mpsslog("can't grant access: %s %s\n",
  682. pts_name, strerror(errno));
  683. goto _close_pty;
  684. }
  685. err = unlockpt(pty_fd);
  686. if (err < 0) {
  687. mpsslog("can't unlock a pseudoterminal: %s %s\n",
  688. pts_name, strerror(errno));
  689. goto _close_pty;
  690. }
  691. console_poll[MONITOR_FD].fd = pty_fd;
  692. console_poll[MONITOR_FD].events = POLLIN;
  693. console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
  694. console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
  695. if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
  696. VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
  697. virtcons_dev_page.dd.num_vq)) {
  698. mpsslog("%s init_vr failed %s\n",
  699. mic->name, strerror(errno));
  700. goto _close_pty;
  701. }
  702. copy.iovcnt = 1;
  703. desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
  704. for (;;) {
  705. console_poll[MONITOR_FD].revents = 0;
  706. console_poll[VIRTIO_CONSOLE_FD].revents = 0;
  707. err = poll(console_poll, MAX_CONSOLE_FD, -1);
  708. if (err < 0) {
  709. mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
  710. strerror(errno));
  711. continue;
  712. }
  713. if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
  714. wait_for_card_driver(mic,
  715. mic->mic_console.virtio_console_fd,
  716. VIRTIO_ID_CONSOLE);
  717. if (console_poll[MONITOR_FD].revents & POLLIN) {
  718. copy.iov = iov0;
  719. len = readv(pty_fd, copy.iov, copy.iovcnt);
  720. if (len > 0) {
  721. #ifdef DEBUG
  722. disp_iovec(mic, copy, __func__, __LINE__);
  723. mpsslog("%s %s %d read from tap 0x%lx\n",
  724. mic->name, __func__, __LINE__,
  725. len);
  726. #endif
  727. spin_for_descriptors(mic, &tx_vr);
  728. txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
  729. &copy, len);
  730. err = mic_virtio_copy(mic,
  731. mic->mic_console.virtio_console_fd,
  732. &tx_vr, &copy);
  733. if (err < 0) {
  734. mpsslog("%s %s %d mic_virtio_copy %s\n",
  735. mic->name, __func__, __LINE__,
  736. strerror(errno));
  737. }
  738. if (!err)
  739. verify_out_len(mic, &copy);
  740. #ifdef DEBUG
  741. disp_iovec(mic, copy, __func__, __LINE__);
  742. mpsslog("%s %s %d wrote to net 0x%lx\n",
  743. mic->name, __func__, __LINE__,
  744. sum_iovec_len(copy));
  745. #endif
  746. /* Reinitialize IOV for next run */
  747. iov0->iov_len = PAGE_SIZE;
  748. } else if (len < 0) {
  749. disp_iovec(mic, &copy, __func__, __LINE__);
  750. mpsslog("%s %s %d read failed %s ",
  751. mic->name, __func__, __LINE__,
  752. strerror(errno));
  753. mpsslog("cnt %d sum %d\n",
  754. copy.iovcnt, sum_iovec_len(&copy));
  755. }
  756. }
  757. if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
  758. while (rx_vr.info->avail_idx !=
  759. le16toh(rx_vr.vr.avail->idx)) {
  760. copy.iov = iov1;
  761. txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
  762. &copy, PAGE_SIZE);
  763. err = mic_virtio_copy(mic,
  764. mic->mic_console.virtio_console_fd,
  765. &rx_vr, &copy);
  766. if (!err) {
  767. /* Set the correct output iov_len */
  768. iov1->iov_len = copy.out_len;
  769. verify_out_len(mic, &copy);
  770. #ifdef DEBUG
  771. disp_iovec(mic, copy, __func__,
  772. __LINE__);
  773. mpsslog("%s %s %d ",
  774. mic->name, __func__, __LINE__);
  775. mpsslog("read from net 0x%lx\n",
  776. sum_iovec_len(copy));
  777. #endif
  778. len = writev(pty_fd,
  779. copy.iov, copy.iovcnt);
  780. if (len != sum_iovec_len(&copy)) {
  781. mpsslog("Tun write failed %s ",
  782. strerror(errno));
  783. mpsslog("len 0x%x ", len);
  784. mpsslog("read_len 0x%x\n",
  785. sum_iovec_len(&copy));
  786. } else {
  787. #ifdef DEBUG
  788. disp_iovec(mic, copy, __func__,
  789. __LINE__);
  790. mpsslog("%s %s %d ",
  791. mic->name, __func__,
  792. __LINE__);
  793. mpsslog("wrote to tap 0x%lx\n",
  794. len);
  795. #endif
  796. }
  797. } else {
  798. mpsslog("%s %s %d mic_virtio_copy %s\n",
  799. mic->name, __func__, __LINE__,
  800. strerror(errno));
  801. break;
  802. }
  803. }
  804. }
  805. if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
  806. mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
  807. }
  808. _close_pty:
  809. close(pty_fd);
  810. _return:
  811. pthread_exit(NULL);
  812. }
  813. static void
  814. add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
  815. {
  816. char path[PATH_MAX];
  817. int fd, err;
  818. snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
  819. fd = open(path, O_RDWR);
  820. if (fd < 0) {
  821. mpsslog("Could not open %s %s\n", path, strerror(errno));
  822. return;
  823. }
  824. err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
  825. if (err < 0) {
  826. mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
  827. close(fd);
  828. return;
  829. }
  830. switch (dd->type) {
  831. case VIRTIO_ID_NET:
  832. mic->mic_net.virtio_net_fd = fd;
  833. mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
  834. break;
  835. case VIRTIO_ID_CONSOLE:
  836. mic->mic_console.virtio_console_fd = fd;
  837. mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
  838. break;
  839. case VIRTIO_ID_BLOCK:
  840. mic->mic_virtblk.virtio_block_fd = fd;
  841. mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
  842. break;
  843. }
  844. }
  845. static bool
  846. set_backend_file(struct mic_info *mic)
  847. {
  848. FILE *config;
  849. char buff[PATH_MAX], *line, *evv, *p;
  850. snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
  851. config = fopen(buff, "r");
  852. if (config == NULL)
  853. return false;
  854. do { /* look for "virtblk_backend=XXXX" */
  855. line = fgets(buff, PATH_MAX, config);
  856. if (line == NULL)
  857. break;
  858. if (*line == '#')
  859. continue;
  860. p = strchr(line, '\n');
  861. if (p)
  862. *p = '\0';
  863. } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
  864. fclose(config);
  865. if (line == NULL)
  866. return false;
  867. evv = strchr(line, '=');
  868. if (evv == NULL)
  869. return false;
  870. mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
  871. if (mic->mic_virtblk.backend_file == NULL) {
  872. mpsslog("can't allocate memory\n", mic->name, mic->id);
  873. return false;
  874. }
  875. strcpy(mic->mic_virtblk.backend_file, evv + 1);
  876. return true;
  877. }
  878. #define SECTOR_SIZE 512
  879. static bool
  880. set_backend_size(struct mic_info *mic)
  881. {
  882. mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
  883. SEEK_END);
  884. if (mic->mic_virtblk.backend_size < 0) {
  885. mpsslog("%s: can't seek: %s\n",
  886. mic->name, mic->mic_virtblk.backend_file);
  887. return false;
  888. }
  889. virtblk_dev_page.blk_config.capacity =
  890. mic->mic_virtblk.backend_size / SECTOR_SIZE;
  891. if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
  892. virtblk_dev_page.blk_config.capacity++;
  893. virtblk_dev_page.blk_config.capacity =
  894. htole64(virtblk_dev_page.blk_config.capacity);
  895. return true;
  896. }
  897. static bool
  898. open_backend(struct mic_info *mic)
  899. {
  900. if (!set_backend_file(mic))
  901. goto _error_exit;
  902. mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
  903. if (mic->mic_virtblk.backend < 0) {
  904. mpsslog("%s: can't open: %s\n", mic->name,
  905. mic->mic_virtblk.backend_file);
  906. goto _error_free;
  907. }
  908. if (!set_backend_size(mic))
  909. goto _error_close;
  910. mic->mic_virtblk.backend_addr = mmap(NULL,
  911. mic->mic_virtblk.backend_size,
  912. PROT_READ|PROT_WRITE, MAP_SHARED,
  913. mic->mic_virtblk.backend, 0L);
  914. if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
  915. mpsslog("%s: can't map: %s %s\n",
  916. mic->name, mic->mic_virtblk.backend_file,
  917. strerror(errno));
  918. goto _error_close;
  919. }
  920. return true;
  921. _error_close:
  922. close(mic->mic_virtblk.backend);
  923. _error_free:
  924. free(mic->mic_virtblk.backend_file);
  925. _error_exit:
  926. return false;
  927. }
  928. static void
  929. close_backend(struct mic_info *mic)
  930. {
  931. munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
  932. close(mic->mic_virtblk.backend);
  933. free(mic->mic_virtblk.backend_file);
  934. }
  935. static bool
  936. start_virtblk(struct mic_info *mic, struct mic_vring *vring)
  937. {
  938. if (((__u64)&virtblk_dev_page.blk_config % 8) != 0) {
  939. mpsslog("%s: blk_config is not 8 byte aligned.\n",
  940. mic->name);
  941. return false;
  942. }
  943. add_virtio_device(mic, &virtblk_dev_page.dd);
  944. if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
  945. VIRTIO_ID_BLOCK, vring, NULL, virtblk_dev_page.dd.num_vq)) {
  946. mpsslog("%s init_vr failed %s\n",
  947. mic->name, strerror(errno));
  948. return false;
  949. }
  950. return true;
  951. }
  952. static void
  953. stop_virtblk(struct mic_info *mic)
  954. {
  955. int vr_size, ret;
  956. vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
  957. MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
  958. ret = munmap(mic->mic_virtblk.block_dp,
  959. MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
  960. if (ret < 0)
  961. mpsslog("%s munmap errno %d\n", mic->name, errno);
  962. close(mic->mic_virtblk.virtio_block_fd);
  963. }
  964. static __u8
  965. header_error_check(struct vring_desc *desc)
  966. {
  967. if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
  968. mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
  969. __func__, __LINE__);
  970. return -EIO;
  971. }
  972. if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
  973. mpsslog("%s() %d: alone\n",
  974. __func__, __LINE__);
  975. return -EIO;
  976. }
  977. if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
  978. mpsslog("%s() %d: not read\n",
  979. __func__, __LINE__);
  980. return -EIO;
  981. }
  982. return 0;
  983. }
  984. static int
  985. read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
  986. {
  987. struct iovec iovec;
  988. struct mic_copy_desc copy;
  989. iovec.iov_len = sizeof(*hdr);
  990. iovec.iov_base = hdr;
  991. copy.iov = &iovec;
  992. copy.iovcnt = 1;
  993. copy.vr_idx = 0; /* only one vring on virtio_block */
  994. copy.update_used = false; /* do not update used index */
  995. return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
  996. }
  997. static int
  998. transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
  999. {
  1000. struct mic_copy_desc copy;
  1001. copy.iov = iovec;
  1002. copy.iovcnt = iovcnt;
  1003. copy.vr_idx = 0; /* only one vring on virtio_block */
  1004. copy.update_used = false; /* do not update used index */
  1005. return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
  1006. }
  1007. static __u8
  1008. status_error_check(struct vring_desc *desc)
  1009. {
  1010. if (le32toh(desc->len) != sizeof(__u8)) {
  1011. mpsslog("%s() %d: length is not sizeof(status)\n",
  1012. __func__, __LINE__);
  1013. return -EIO;
  1014. }
  1015. return 0;
  1016. }
  1017. static int
  1018. write_status(int fd, __u8 *status)
  1019. {
  1020. struct iovec iovec;
  1021. struct mic_copy_desc copy;
  1022. iovec.iov_base = status;
  1023. iovec.iov_len = sizeof(*status);
  1024. copy.iov = &iovec;
  1025. copy.iovcnt = 1;
  1026. copy.vr_idx = 0; /* only one vring on virtio_block */
  1027. copy.update_used = true; /* Update used index */
  1028. return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
  1029. }
  1030. static void *
  1031. virtio_block(void *arg)
  1032. {
  1033. struct mic_info *mic = (struct mic_info *) arg;
  1034. int ret;
  1035. struct pollfd block_poll;
  1036. struct mic_vring vring;
  1037. __u16 avail_idx;
  1038. __u32 desc_idx;
  1039. struct vring_desc *desc;
  1040. struct iovec *iovec, *piov;
  1041. __u8 status;
  1042. __u32 buffer_desc_idx;
  1043. struct virtio_blk_outhdr hdr;
  1044. void *fos;
  1045. for (;;) { /* forever */
  1046. if (!open_backend(mic)) { /* No virtblk */
  1047. for (mic->mic_virtblk.signaled = 0;
  1048. !mic->mic_virtblk.signaled;)
  1049. sleep(1);
  1050. continue;
  1051. }
  1052. /* backend file is specified. */
  1053. if (!start_virtblk(mic, &vring))
  1054. goto _close_backend;
  1055. iovec = malloc(sizeof(*iovec) *
  1056. le32toh(virtblk_dev_page.blk_config.seg_max));
  1057. if (!iovec) {
  1058. mpsslog("%s: can't alloc iovec: %s\n",
  1059. mic->name, strerror(ENOMEM));
  1060. goto _stop_virtblk;
  1061. }
  1062. block_poll.fd = mic->mic_virtblk.virtio_block_fd;
  1063. block_poll.events = POLLIN;
  1064. for (mic->mic_virtblk.signaled = 0;
  1065. !mic->mic_virtblk.signaled;) {
  1066. block_poll.revents = 0;
  1067. /* timeout in 1 sec to see signaled */
  1068. ret = poll(&block_poll, 1, 1000);
  1069. if (ret < 0) {
  1070. mpsslog("%s %d: poll failed: %s\n",
  1071. __func__, __LINE__,
  1072. strerror(errno));
  1073. continue;
  1074. }
  1075. if (!(block_poll.revents & POLLIN)) {
  1076. #ifdef DEBUG
  1077. mpsslog("%s %d: block_poll.revents=0x%x\n",
  1078. __func__, __LINE__, block_poll.revents);
  1079. #endif
  1080. continue;
  1081. }
  1082. /* POLLIN */
  1083. while (vring.info->avail_idx !=
  1084. le16toh(vring.vr.avail->idx)) {
  1085. /* read header element */
  1086. avail_idx =
  1087. vring.info->avail_idx &
  1088. (vring.vr.num - 1);
  1089. desc_idx = le16toh(
  1090. vring.vr.avail->ring[avail_idx]);
  1091. desc = &vring.vr.desc[desc_idx];
  1092. #ifdef DEBUG
  1093. mpsslog("%s() %d: avail_idx=%d ",
  1094. __func__, __LINE__,
  1095. vring.info->avail_idx);
  1096. mpsslog("vring.vr.num=%d desc=%p\n",
  1097. vring.vr.num, desc);
  1098. #endif
  1099. status = header_error_check(desc);
  1100. ret = read_header(
  1101. mic->mic_virtblk.virtio_block_fd,
  1102. &hdr, desc_idx);
  1103. if (ret < 0) {
  1104. mpsslog("%s() %d %s: ret=%d %s\n",
  1105. __func__, __LINE__,
  1106. mic->name, ret,
  1107. strerror(errno));
  1108. break;
  1109. }
  1110. /* buffer element */
  1111. piov = iovec;
  1112. status = 0;
  1113. fos = mic->mic_virtblk.backend_addr +
  1114. (hdr.sector * SECTOR_SIZE);
  1115. buffer_desc_idx = desc_idx =
  1116. next_desc(desc);
  1117. for (desc = &vring.vr.desc[buffer_desc_idx];
  1118. desc->flags & VRING_DESC_F_NEXT;
  1119. desc_idx = next_desc(desc),
  1120. desc = &vring.vr.desc[desc_idx]) {
  1121. piov->iov_len = desc->len;
  1122. piov->iov_base = fos;
  1123. piov++;
  1124. fos += desc->len;
  1125. }
  1126. /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
  1127. if (hdr.type & ~(VIRTIO_BLK_T_OUT |
  1128. VIRTIO_BLK_T_GET_ID)) {
  1129. /*
  1130. VIRTIO_BLK_T_IN - does not do
  1131. anything. Probably for documenting.
  1132. VIRTIO_BLK_T_SCSI_CMD - for
  1133. virtio_scsi.
  1134. VIRTIO_BLK_T_FLUSH - turned off in
  1135. config space.
  1136. VIRTIO_BLK_T_BARRIER - defined but not
  1137. used in anywhere.
  1138. */
  1139. mpsslog("%s() %d: type %x ",
  1140. __func__, __LINE__,
  1141. hdr.type);
  1142. mpsslog("is not supported\n");
  1143. status = -ENOTSUP;
  1144. } else {
  1145. ret = transfer_blocks(
  1146. mic->mic_virtblk.virtio_block_fd,
  1147. iovec,
  1148. piov - iovec);
  1149. if (ret < 0 &&
  1150. status != 0)
  1151. status = ret;
  1152. }
  1153. /* write status and update used pointer */
  1154. if (status != 0)
  1155. status = status_error_check(desc);
  1156. ret = write_status(
  1157. mic->mic_virtblk.virtio_block_fd,
  1158. &status);
  1159. #ifdef DEBUG
  1160. mpsslog("%s() %d: write status=%d on desc=%p\n",
  1161. __func__, __LINE__,
  1162. status, desc);
  1163. #endif
  1164. }
  1165. }
  1166. free(iovec);
  1167. _stop_virtblk:
  1168. stop_virtblk(mic);
  1169. _close_backend:
  1170. close_backend(mic);
  1171. } /* forever */
  1172. pthread_exit(NULL);
  1173. }
  1174. static void
  1175. reset(struct mic_info *mic)
  1176. {
  1177. #define RESET_TIMEOUT 120
  1178. int i = RESET_TIMEOUT;
  1179. setsysfs(mic->name, "state", "reset");
  1180. while (i) {
  1181. char *state;
  1182. state = readsysfs(mic->name, "state");
  1183. if (!state)
  1184. goto retry;
  1185. mpsslog("%s: %s %d state %s\n",
  1186. mic->name, __func__, __LINE__, state);
  1187. if ((!strcmp(state, "offline"))) {
  1188. free(state);
  1189. break;
  1190. }
  1191. free(state);
  1192. retry:
  1193. sleep(1);
  1194. i--;
  1195. }
  1196. }
  1197. static int
  1198. get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
  1199. {
  1200. if (!strcmp(shutdown_status, "nop"))
  1201. return MIC_NOP;
  1202. if (!strcmp(shutdown_status, "crashed"))
  1203. return MIC_CRASHED;
  1204. if (!strcmp(shutdown_status, "halted"))
  1205. return MIC_HALTED;
  1206. if (!strcmp(shutdown_status, "poweroff"))
  1207. return MIC_POWER_OFF;
  1208. if (!strcmp(shutdown_status, "restart"))
  1209. return MIC_RESTART;
  1210. mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
  1211. /* Invalid state */
  1212. assert(0);
  1213. };
  1214. static int get_mic_state(struct mic_info *mic, char *state)
  1215. {
  1216. if (!strcmp(state, "offline"))
  1217. return MIC_OFFLINE;
  1218. if (!strcmp(state, "online"))
  1219. return MIC_ONLINE;
  1220. if (!strcmp(state, "shutting_down"))
  1221. return MIC_SHUTTING_DOWN;
  1222. if (!strcmp(state, "reset_failed"))
  1223. return MIC_RESET_FAILED;
  1224. mpsslog("%s: BUG invalid state %s\n", mic->name, state);
  1225. /* Invalid state */
  1226. assert(0);
  1227. };
  1228. static void mic_handle_shutdown(struct mic_info *mic)
  1229. {
  1230. #define SHUTDOWN_TIMEOUT 60
  1231. int i = SHUTDOWN_TIMEOUT, ret, stat = 0;
  1232. char *shutdown_status;
  1233. while (i) {
  1234. shutdown_status = readsysfs(mic->name, "shutdown_status");
  1235. if (!shutdown_status)
  1236. continue;
  1237. mpsslog("%s: %s %d shutdown_status %s\n",
  1238. mic->name, __func__, __LINE__, shutdown_status);
  1239. switch (get_mic_shutdown_status(mic, shutdown_status)) {
  1240. case MIC_RESTART:
  1241. mic->restart = 1;
  1242. case MIC_HALTED:
  1243. case MIC_POWER_OFF:
  1244. case MIC_CRASHED:
  1245. free(shutdown_status);
  1246. goto reset;
  1247. default:
  1248. break;
  1249. }
  1250. free(shutdown_status);
  1251. sleep(1);
  1252. i--;
  1253. }
  1254. reset:
  1255. ret = kill(mic->pid, SIGTERM);
  1256. mpsslog("%s: %s %d kill pid %d ret %d\n",
  1257. mic->name, __func__, __LINE__,
  1258. mic->pid, ret);
  1259. if (!ret) {
  1260. ret = waitpid(mic->pid, &stat,
  1261. WIFSIGNALED(stat));
  1262. mpsslog("%s: %s %d waitpid ret %d pid %d\n",
  1263. mic->name, __func__, __LINE__,
  1264. ret, mic->pid);
  1265. }
  1266. if (ret == mic->pid)
  1267. reset(mic);
  1268. }
  1269. static void *
  1270. mic_config(void *arg)
  1271. {
  1272. struct mic_info *mic = (struct mic_info *)arg;
  1273. char *state = NULL;
  1274. char pathname[PATH_MAX];
  1275. int fd, ret;
  1276. struct pollfd ufds[1];
  1277. char value[4096];
  1278. snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
  1279. MICSYSFSDIR, mic->name, "state");
  1280. fd = open(pathname, O_RDONLY);
  1281. if (fd < 0) {
  1282. mpsslog("%s: opening file %s failed %s\n",
  1283. mic->name, pathname, strerror(errno));
  1284. goto error;
  1285. }
  1286. do {
  1287. ret = read(fd, value, sizeof(value));
  1288. if (ret < 0) {
  1289. mpsslog("%s: Failed to read sysfs entry '%s': %s\n",
  1290. mic->name, pathname, strerror(errno));
  1291. goto close_error1;
  1292. }
  1293. retry:
  1294. state = readsysfs(mic->name, "state");
  1295. if (!state)
  1296. goto retry;
  1297. mpsslog("%s: %s %d state %s\n",
  1298. mic->name, __func__, __LINE__, state);
  1299. switch (get_mic_state(mic, state)) {
  1300. case MIC_SHUTTING_DOWN:
  1301. mic_handle_shutdown(mic);
  1302. goto close_error;
  1303. default:
  1304. break;
  1305. }
  1306. free(state);
  1307. ufds[0].fd = fd;
  1308. ufds[0].events = POLLERR | POLLPRI;
  1309. ret = poll(ufds, 1, -1);
  1310. if (ret < 0) {
  1311. mpsslog("%s: poll failed %s\n",
  1312. mic->name, strerror(errno));
  1313. goto close_error1;
  1314. }
  1315. } while (1);
  1316. close_error:
  1317. free(state);
  1318. close_error1:
  1319. close(fd);
  1320. error:
  1321. init_mic(mic);
  1322. pthread_exit(NULL);
  1323. }
  1324. static void
  1325. set_cmdline(struct mic_info *mic)
  1326. {
  1327. char buffer[PATH_MAX];
  1328. int len;
  1329. len = snprintf(buffer, PATH_MAX,
  1330. "clocksource=tsc highres=off nohz=off ");
  1331. len += snprintf(buffer + len, PATH_MAX,
  1332. "cpufreq_on;corec6_off;pc3_off;pc6_off ");
  1333. len += snprintf(buffer + len, PATH_MAX,
  1334. "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
  1335. mic->id);
  1336. setsysfs(mic->name, "cmdline", buffer);
  1337. mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
  1338. snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id);
  1339. mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
  1340. }
  1341. static void
  1342. set_log_buf_info(struct mic_info *mic)
  1343. {
  1344. int fd;
  1345. off_t len;
  1346. char system_map[] = "/lib/firmware/mic/System.map";
  1347. char *map, *temp, log_buf[17] = {'\0'};
  1348. fd = open(system_map, O_RDONLY);
  1349. if (fd < 0) {
  1350. mpsslog("%s: Opening System.map failed: %d\n",
  1351. mic->name, errno);
  1352. return;
  1353. }
  1354. len = lseek(fd, 0, SEEK_END);
  1355. if (len < 0) {
  1356. mpsslog("%s: Reading System.map size failed: %d\n",
  1357. mic->name, errno);
  1358. close(fd);
  1359. return;
  1360. }
  1361. map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
  1362. if (map == MAP_FAILED) {
  1363. mpsslog("%s: mmap of System.map failed: %d\n",
  1364. mic->name, errno);
  1365. close(fd);
  1366. return;
  1367. }
  1368. temp = strstr(map, "__log_buf");
  1369. if (!temp) {
  1370. mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
  1371. munmap(map, len);
  1372. close(fd);
  1373. return;
  1374. }
  1375. strncpy(log_buf, temp - 19, 16);
  1376. setsysfs(mic->name, "log_buf_addr", log_buf);
  1377. mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
  1378. temp = strstr(map, "log_buf_len");
  1379. if (!temp) {
  1380. mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
  1381. munmap(map, len);
  1382. close(fd);
  1383. return;
  1384. }
  1385. strncpy(log_buf, temp - 19, 16);
  1386. setsysfs(mic->name, "log_buf_len", log_buf);
  1387. mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
  1388. munmap(map, len);
  1389. close(fd);
  1390. }
  1391. static void init_mic(struct mic_info *mic);
  1392. static void
  1393. change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
  1394. {
  1395. struct mic_info *mic;
  1396. for (mic = mic_list.next; mic != NULL; mic = mic->next)
  1397. mic->mic_virtblk.signaled = 1/* true */;
  1398. }
  1399. static void
  1400. init_mic(struct mic_info *mic)
  1401. {
  1402. struct sigaction ignore = {
  1403. .sa_flags = 0,
  1404. .sa_handler = SIG_IGN
  1405. };
  1406. struct sigaction act = {
  1407. .sa_flags = SA_SIGINFO,
  1408. .sa_sigaction = change_virtblk_backend,
  1409. };
  1410. char buffer[PATH_MAX];
  1411. int err;
  1412. /*
  1413. * Currently, one virtio block device is supported for each MIC card
  1414. * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
  1415. * The signal informs the virtio block backend about a change in the
  1416. * configuration file which specifies the virtio backend file name on
  1417. * the host. Virtio block backend then re-reads the configuration file
  1418. * and switches to the new block device. This signalling mechanism may
  1419. * not be required once multiple virtio block devices are supported by
  1420. * the MIC daemon.
  1421. */
  1422. sigaction(SIGUSR1, &ignore, NULL);
  1423. mic->pid = fork();
  1424. switch (mic->pid) {
  1425. case 0:
  1426. set_log_buf_info(mic);
  1427. set_cmdline(mic);
  1428. add_virtio_device(mic, &virtcons_dev_page.dd);
  1429. add_virtio_device(mic, &virtnet_dev_page.dd);
  1430. err = pthread_create(&mic->mic_console.console_thread, NULL,
  1431. virtio_console, mic);
  1432. if (err)
  1433. mpsslog("%s virtcons pthread_create failed %s\n",
  1434. mic->name, strerror(err));
  1435. err = pthread_create(&mic->mic_net.net_thread, NULL,
  1436. virtio_net, mic);
  1437. if (err)
  1438. mpsslog("%s virtnet pthread_create failed %s\n",
  1439. mic->name, strerror(err));
  1440. err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
  1441. virtio_block, mic);
  1442. if (err)
  1443. mpsslog("%s virtblk pthread_create failed %s\n",
  1444. mic->name, strerror(err));
  1445. sigemptyset(&act.sa_mask);
  1446. err = sigaction(SIGUSR1, &act, NULL);
  1447. if (err)
  1448. mpsslog("%s sigaction SIGUSR1 failed %s\n",
  1449. mic->name, strerror(errno));
  1450. while (1)
  1451. sleep(60);
  1452. case -1:
  1453. mpsslog("fork failed MIC name %s id %d errno %d\n",
  1454. mic->name, mic->id, errno);
  1455. break;
  1456. default:
  1457. if (mic->restart) {
  1458. snprintf(buffer, PATH_MAX, "boot");
  1459. setsysfs(mic->name, "state", buffer);
  1460. mpsslog("%s restarting mic %d\n",
  1461. mic->name, mic->restart);
  1462. mic->restart = 0;
  1463. }
  1464. pthread_create(&mic->config_thread, NULL, mic_config, mic);
  1465. }
  1466. }
  1467. static void
  1468. start_daemon(void)
  1469. {
  1470. struct mic_info *mic;
  1471. for (mic = mic_list.next; mic != NULL; mic = mic->next)
  1472. init_mic(mic);
  1473. while (1)
  1474. sleep(60);
  1475. }
  1476. static int
  1477. init_mic_list(void)
  1478. {
  1479. struct mic_info *mic = &mic_list;
  1480. struct dirent *file;
  1481. DIR *dp;
  1482. int cnt = 0;
  1483. dp = opendir(MICSYSFSDIR);
  1484. if (!dp)
  1485. return 0;
  1486. while ((file = readdir(dp)) != NULL) {
  1487. if (!strncmp(file->d_name, "mic", 3)) {
  1488. mic->next = malloc(sizeof(struct mic_info));
  1489. if (mic->next) {
  1490. mic = mic->next;
  1491. mic->next = NULL;
  1492. memset(mic, 0, sizeof(struct mic_info));
  1493. mic->id = atoi(&file->d_name[3]);
  1494. mic->name = malloc(strlen(file->d_name) + 16);
  1495. if (mic->name)
  1496. strcpy(mic->name, file->d_name);
  1497. mpsslog("MIC name %s id %d\n", mic->name,
  1498. mic->id);
  1499. cnt++;
  1500. }
  1501. }
  1502. }
  1503. closedir(dp);
  1504. return cnt;
  1505. }
  1506. void
  1507. mpsslog(char *format, ...)
  1508. {
  1509. va_list args;
  1510. char buffer[4096];
  1511. char ts[52], *ts1;
  1512. time_t t;
  1513. if (logfp == NULL)
  1514. return;
  1515. va_start(args, format);
  1516. vsprintf(buffer, format, args);
  1517. va_end(args);
  1518. time(&t);
  1519. ts1 = ctime_r(&t, ts);
  1520. ts1[strlen(ts1) - 1] = '\0';
  1521. fprintf(logfp, "%s: %s", ts1, buffer);
  1522. fflush(logfp);
  1523. }
  1524. int
  1525. main(int argc, char *argv[])
  1526. {
  1527. int cnt;
  1528. pid_t pid;
  1529. myname = argv[0];
  1530. logfp = fopen(LOGFILE_NAME, "a+");
  1531. if (!logfp) {
  1532. fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
  1533. exit(1);
  1534. }
  1535. pid = fork();
  1536. switch (pid) {
  1537. case 0:
  1538. break;
  1539. case -1:
  1540. exit(2);
  1541. default:
  1542. exit(0);
  1543. }
  1544. mpsslog("MIC Daemon start\n");
  1545. cnt = init_mic_list();
  1546. if (cnt == 0) {
  1547. mpsslog("MIC module not loaded\n");
  1548. exit(3);
  1549. }
  1550. mpsslog("MIC found %d devices\n", cnt);
  1551. start_daemon();
  1552. exit(0);
  1553. }