mpssd.c 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721
  1. /*
  2. * Intel MIC Platform Software Stack (MPSS)
  3. *
  4. * Copyright(c) 2013 Intel Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License, version 2, as
  8. * published by the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * The full GNU General Public License is included in this distribution in
  16. * the file called "COPYING".
  17. *
  18. * Intel MIC User Space Tools.
  19. */
  20. #define _GNU_SOURCE
  21. #include <stdlib.h>
  22. #include <fcntl.h>
  23. #include <getopt.h>
  24. #include <assert.h>
  25. #include <unistd.h>
  26. #include <stdbool.h>
  27. #include <signal.h>
  28. #include <poll.h>
  29. #include <features.h>
  30. #include <sys/types.h>
  31. #include <sys/stat.h>
  32. #include <sys/mman.h>
  33. #include <sys/socket.h>
  34. #include <linux/virtio_ring.h>
  35. #include <linux/virtio_net.h>
  36. #include <linux/virtio_console.h>
  37. #include <linux/virtio_blk.h>
  38. #include <linux/version.h>
  39. #include "mpssd.h"
  40. #include <linux/mic_ioctl.h>
  41. #include <linux/mic_common.h>
  42. static void init_mic(struct mic_info *mic);
  43. static FILE *logfp;
  44. static struct mic_info mic_list;
  45. #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
  46. #define min_t(type, x, y) ({ \
  47. type __min1 = (x); \
  48. type __min2 = (y); \
  49. __min1 < __min2 ? __min1 : __min2; })
  50. /* align addr on a size boundary - adjust address up/down if needed */
  51. #define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1)))
  52. #define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size)
  53. /* align addr on a size boundary - adjust address up if needed */
  54. #define _ALIGN(addr, size) _ALIGN_UP(addr, size)
  55. /* to align the pointer to the (next) page boundary */
  56. #define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE)
  57. #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
  58. #define GSO_ENABLED 1
  59. #define MAX_GSO_SIZE (64 * 1024)
  60. #define ETH_H_LEN 14
  61. #define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
  62. #define MIC_DEVICE_PAGE_END 0x1000
  63. #ifndef VIRTIO_NET_HDR_F_DATA_VALID
  64. #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
  65. #endif
  66. static struct {
  67. struct mic_device_desc dd;
  68. struct mic_vqconfig vqconfig[2];
  69. __u32 host_features, guest_acknowledgements;
  70. struct virtio_console_config cons_config;
  71. } virtcons_dev_page = {
  72. .dd = {
  73. .type = VIRTIO_ID_CONSOLE,
  74. .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
  75. .feature_len = sizeof(virtcons_dev_page.host_features),
  76. .config_len = sizeof(virtcons_dev_page.cons_config),
  77. },
  78. .vqconfig[0] = {
  79. .num = htole16(MIC_VRING_ENTRIES),
  80. },
  81. .vqconfig[1] = {
  82. .num = htole16(MIC_VRING_ENTRIES),
  83. },
  84. };
  85. static struct {
  86. struct mic_device_desc dd;
  87. struct mic_vqconfig vqconfig[2];
  88. __u32 host_features, guest_acknowledgements;
  89. struct virtio_net_config net_config;
  90. } virtnet_dev_page = {
  91. .dd = {
  92. .type = VIRTIO_ID_NET,
  93. .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
  94. .feature_len = sizeof(virtnet_dev_page.host_features),
  95. .config_len = sizeof(virtnet_dev_page.net_config),
  96. },
  97. .vqconfig[0] = {
  98. .num = htole16(MIC_VRING_ENTRIES),
  99. },
  100. .vqconfig[1] = {
  101. .num = htole16(MIC_VRING_ENTRIES),
  102. },
  103. #if GSO_ENABLED
  104. .host_features = htole32(
  105. 1 << VIRTIO_NET_F_CSUM |
  106. 1 << VIRTIO_NET_F_GSO |
  107. 1 << VIRTIO_NET_F_GUEST_TSO4 |
  108. 1 << VIRTIO_NET_F_GUEST_TSO6 |
  109. 1 << VIRTIO_NET_F_GUEST_ECN |
  110. 1 << VIRTIO_NET_F_GUEST_UFO),
  111. #else
  112. .host_features = 0,
  113. #endif
  114. };
  115. static const char *mic_config_dir = "/etc/sysconfig/mic";
  116. static const char *virtblk_backend = "VIRTBLK_BACKEND";
  117. static struct {
  118. struct mic_device_desc dd;
  119. struct mic_vqconfig vqconfig[1];
  120. __u32 host_features, guest_acknowledgements;
  121. struct virtio_blk_config blk_config;
  122. } virtblk_dev_page = {
  123. .dd = {
  124. .type = VIRTIO_ID_BLOCK,
  125. .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
  126. .feature_len = sizeof(virtblk_dev_page.host_features),
  127. .config_len = sizeof(virtblk_dev_page.blk_config),
  128. },
  129. .vqconfig[0] = {
  130. .num = htole16(MIC_VRING_ENTRIES),
  131. },
  132. .host_features =
  133. htole32(1<<VIRTIO_BLK_F_SEG_MAX),
  134. .blk_config = {
  135. .seg_max = htole32(MIC_VRING_ENTRIES - 2),
  136. .capacity = htole64(0),
  137. }
  138. };
  139. static char *myname;
  140. static int
  141. tap_configure(struct mic_info *mic, char *dev)
  142. {
  143. pid_t pid;
  144. char *ifargv[7];
  145. char ipaddr[IFNAMSIZ];
  146. int ret = 0;
  147. pid = fork();
  148. if (pid == 0) {
  149. ifargv[0] = "ip";
  150. ifargv[1] = "link";
  151. ifargv[2] = "set";
  152. ifargv[3] = dev;
  153. ifargv[4] = "up";
  154. ifargv[5] = NULL;
  155. mpsslog("Configuring %s\n", dev);
  156. ret = execvp("ip", ifargv);
  157. if (ret < 0) {
  158. mpsslog("%s execvp failed errno %s\n",
  159. mic->name, strerror(errno));
  160. return ret;
  161. }
  162. }
  163. if (pid < 0) {
  164. mpsslog("%s fork failed errno %s\n",
  165. mic->name, strerror(errno));
  166. return ret;
  167. }
  168. ret = waitpid(pid, NULL, 0);
  169. if (ret < 0) {
  170. mpsslog("%s waitpid failed errno %s\n",
  171. mic->name, strerror(errno));
  172. return ret;
  173. }
  174. snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id);
  175. pid = fork();
  176. if (pid == 0) {
  177. ifargv[0] = "ip";
  178. ifargv[1] = "addr";
  179. ifargv[2] = "add";
  180. ifargv[3] = ipaddr;
  181. ifargv[4] = "dev";
  182. ifargv[5] = dev;
  183. ifargv[6] = NULL;
  184. mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
  185. ret = execvp("ip", ifargv);
  186. if (ret < 0) {
  187. mpsslog("%s execvp failed errno %s\n",
  188. mic->name, strerror(errno));
  189. return ret;
  190. }
  191. }
  192. if (pid < 0) {
  193. mpsslog("%s fork failed errno %s\n",
  194. mic->name, strerror(errno));
  195. return ret;
  196. }
  197. ret = waitpid(pid, NULL, 0);
  198. if (ret < 0) {
  199. mpsslog("%s waitpid failed errno %s\n",
  200. mic->name, strerror(errno));
  201. return ret;
  202. }
  203. mpsslog("MIC name %s %s %d DONE!\n",
  204. mic->name, __func__, __LINE__);
  205. return 0;
  206. }
  207. static int tun_alloc(struct mic_info *mic, char *dev)
  208. {
  209. struct ifreq ifr;
  210. int fd, err;
  211. #if GSO_ENABLED
  212. unsigned offload;
  213. #endif
  214. fd = open("/dev/net/tun", O_RDWR);
  215. if (fd < 0) {
  216. mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
  217. goto done;
  218. }
  219. memset(&ifr, 0, sizeof(ifr));
  220. ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
  221. if (*dev)
  222. strncpy(ifr.ifr_name, dev, IFNAMSIZ);
  223. err = ioctl(fd, TUNSETIFF, (void *)&ifr);
  224. if (err < 0) {
  225. mpsslog("%s %s %d TUNSETIFF failed %s\n",
  226. mic->name, __func__, __LINE__, strerror(errno));
  227. close(fd);
  228. return err;
  229. }
  230. #if GSO_ENABLED
  231. offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
  232. TUN_F_TSO_ECN | TUN_F_UFO;
  233. err = ioctl(fd, TUNSETOFFLOAD, offload);
  234. if (err < 0) {
  235. mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
  236. mic->name, __func__, __LINE__, strerror(errno));
  237. close(fd);
  238. return err;
  239. }
  240. #endif
  241. strcpy(dev, ifr.ifr_name);
  242. mpsslog("Created TAP %s\n", dev);
  243. done:
  244. return fd;
  245. }
  246. #define NET_FD_VIRTIO_NET 0
  247. #define NET_FD_TUN 1
  248. #define MAX_NET_FD 2
  249. static void set_dp(struct mic_info *mic, int type, void *dp)
  250. {
  251. switch (type) {
  252. case VIRTIO_ID_CONSOLE:
  253. mic->mic_console.console_dp = dp;
  254. return;
  255. case VIRTIO_ID_NET:
  256. mic->mic_net.net_dp = dp;
  257. return;
  258. case VIRTIO_ID_BLOCK:
  259. mic->mic_virtblk.block_dp = dp;
  260. return;
  261. }
  262. mpsslog("%s %s %d not found\n", mic->name, __func__, type);
  263. assert(0);
  264. }
  265. static void *get_dp(struct mic_info *mic, int type)
  266. {
  267. switch (type) {
  268. case VIRTIO_ID_CONSOLE:
  269. return mic->mic_console.console_dp;
  270. case VIRTIO_ID_NET:
  271. return mic->mic_net.net_dp;
  272. case VIRTIO_ID_BLOCK:
  273. return mic->mic_virtblk.block_dp;
  274. }
  275. mpsslog("%s %s %d not found\n", mic->name, __func__, type);
  276. assert(0);
  277. return NULL;
  278. }
  279. static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
  280. {
  281. struct mic_device_desc *d;
  282. int i;
  283. void *dp = get_dp(mic, type);
  284. for (i = mic_aligned_size(struct mic_bootparam); i < PAGE_SIZE;
  285. i += mic_total_desc_size(d)) {
  286. d = dp + i;
  287. /* End of list */
  288. if (d->type == 0)
  289. break;
  290. if (d->type == -1)
  291. continue;
  292. mpsslog("%s %s d-> type %d d %p\n",
  293. mic->name, __func__, d->type, d);
  294. if (d->type == (__u8)type)
  295. return d;
  296. }
  297. mpsslog("%s %s %d not found\n", mic->name, __func__, type);
  298. assert(0);
  299. return NULL;
  300. }
  301. /* See comments in vhost.c for explanation of next_desc() */
  302. static unsigned next_desc(struct vring_desc *desc)
  303. {
  304. unsigned int next;
  305. if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
  306. return -1U;
  307. next = le16toh(desc->next);
  308. return next;
  309. }
  310. /* Sum up all the IOVEC length */
  311. static ssize_t
  312. sum_iovec_len(struct mic_copy_desc *copy)
  313. {
  314. ssize_t sum = 0;
  315. int i;
  316. for (i = 0; i < copy->iovcnt; i++)
  317. sum += copy->iov[i].iov_len;
  318. return sum;
  319. }
  320. static inline void verify_out_len(struct mic_info *mic,
  321. struct mic_copy_desc *copy)
  322. {
  323. if (copy->out_len != sum_iovec_len(copy)) {
  324. mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
  325. mic->name, __func__, __LINE__,
  326. copy->out_len, sum_iovec_len(copy));
  327. assert(copy->out_len == sum_iovec_len(copy));
  328. }
  329. }
  330. /* Display an iovec */
  331. static void
  332. disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
  333. const char *s, int line)
  334. {
  335. int i;
  336. for (i = 0; i < copy->iovcnt; i++)
  337. mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
  338. mic->name, s, line, i,
  339. copy->iov[i].iov_base, copy->iov[i].iov_len);
  340. }
  341. static inline __u16 read_avail_idx(struct mic_vring *vr)
  342. {
  343. return ACCESS_ONCE(vr->info->avail_idx);
  344. }
  345. static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
  346. struct mic_copy_desc *copy, ssize_t len)
  347. {
  348. copy->vr_idx = tx ? 0 : 1;
  349. copy->update_used = true;
  350. if (type == VIRTIO_ID_NET)
  351. copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
  352. else
  353. copy->iov[0].iov_len = len;
  354. }
  355. /* Central API which triggers the copies */
  356. static int
  357. mic_virtio_copy(struct mic_info *mic, int fd,
  358. struct mic_vring *vr, struct mic_copy_desc *copy)
  359. {
  360. int ret;
  361. ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
  362. if (ret) {
  363. mpsslog("%s %s %d errno %s ret %d\n",
  364. mic->name, __func__, __LINE__,
  365. strerror(errno), ret);
  366. }
  367. return ret;
  368. }
  369. /*
  370. * This initialization routine requires at least one
  371. * vring i.e. vr0. vr1 is optional.
  372. */
  373. static void *
  374. init_vr(struct mic_info *mic, int fd, int type,
  375. struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
  376. {
  377. int vr_size;
  378. char *va;
  379. vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
  380. MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
  381. va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
  382. PROT_READ, MAP_SHARED, fd, 0);
  383. if (MAP_FAILED == va) {
  384. mpsslog("%s %s %d mmap failed errno %s\n",
  385. mic->name, __func__, __LINE__,
  386. strerror(errno));
  387. goto done;
  388. }
  389. set_dp(mic, type, va);
  390. vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
  391. vr0->info = vr0->va +
  392. vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
  393. vring_init(&vr0->vr,
  394. MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
  395. mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
  396. __func__, mic->name, vr0->va, vr0->info, vr_size,
  397. vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
  398. mpsslog("magic 0x%x expected 0x%x\n",
  399. vr0->info->magic, MIC_MAGIC + type);
  400. assert(vr0->info->magic == MIC_MAGIC + type);
  401. if (vr1) {
  402. vr1->va = (struct mic_vring *)
  403. &va[MIC_DEVICE_PAGE_END + vr_size];
  404. vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES,
  405. MIC_VIRTIO_RING_ALIGN);
  406. vring_init(&vr1->vr,
  407. MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
  408. mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
  409. __func__, mic->name, vr1->va, vr1->info, vr_size,
  410. vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
  411. mpsslog("magic 0x%x expected 0x%x\n",
  412. vr1->info->magic, MIC_MAGIC + type + 1);
  413. assert(vr1->info->magic == MIC_MAGIC + type + 1);
  414. }
  415. done:
  416. return va;
  417. }
  418. static void
  419. wait_for_card_driver(struct mic_info *mic, int fd, int type)
  420. {
  421. struct pollfd pollfd;
  422. int err;
  423. struct mic_device_desc *desc = get_device_desc(mic, type);
  424. pollfd.fd = fd;
  425. mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
  426. mic->name, __func__, type, desc->status);
  427. while (1) {
  428. pollfd.events = POLLIN;
  429. pollfd.revents = 0;
  430. err = poll(&pollfd, 1, -1);
  431. if (err < 0) {
  432. mpsslog("%s %s poll failed %s\n",
  433. mic->name, __func__, strerror(errno));
  434. continue;
  435. }
  436. if (pollfd.revents) {
  437. mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n",
  438. mic->name, __func__, type, desc->status);
  439. if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
  440. mpsslog("%s %s poll.revents %d\n",
  441. mic->name, __func__, pollfd.revents);
  442. mpsslog("%s %s desc-> type %d status 0x%x\n",
  443. mic->name, __func__, type,
  444. desc->status);
  445. break;
  446. }
  447. }
  448. }
  449. }
  450. /* Spin till we have some descriptors */
  451. static void
  452. spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
  453. {
  454. __u16 avail_idx = read_avail_idx(vr);
  455. while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
  456. #ifdef DEBUG
  457. mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
  458. mic->name, __func__,
  459. le16toh(vr->vr.avail->idx), vr->info->avail_idx);
  460. #endif
  461. sched_yield();
  462. }
  463. }
  464. static void *
  465. virtio_net(void *arg)
  466. {
  467. static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
  468. static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __aligned(64);
  469. struct iovec vnet_iov[2][2] = {
  470. { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
  471. { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
  472. { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
  473. { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
  474. };
  475. struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
  476. struct mic_info *mic = (struct mic_info *)arg;
  477. char if_name[IFNAMSIZ];
  478. struct pollfd net_poll[MAX_NET_FD];
  479. struct mic_vring tx_vr, rx_vr;
  480. struct mic_copy_desc copy;
  481. struct mic_device_desc *desc;
  482. int err;
  483. snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
  484. mic->mic_net.tap_fd = tun_alloc(mic, if_name);
  485. if (mic->mic_net.tap_fd < 0)
  486. goto done;
  487. if (tap_configure(mic, if_name))
  488. goto done;
  489. mpsslog("MIC name %s id %d\n", mic->name, mic->id);
  490. net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
  491. net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
  492. net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
  493. net_poll[NET_FD_TUN].events = POLLIN;
  494. if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
  495. VIRTIO_ID_NET, &tx_vr, &rx_vr,
  496. virtnet_dev_page.dd.num_vq)) {
  497. mpsslog("%s init_vr failed %s\n",
  498. mic->name, strerror(errno));
  499. goto done;
  500. }
  501. copy.iovcnt = 2;
  502. desc = get_device_desc(mic, VIRTIO_ID_NET);
  503. while (1) {
  504. ssize_t len;
  505. net_poll[NET_FD_VIRTIO_NET].revents = 0;
  506. net_poll[NET_FD_TUN].revents = 0;
  507. /* Start polling for data from tap and virtio net */
  508. err = poll(net_poll, 2, -1);
  509. if (err < 0) {
  510. mpsslog("%s poll failed %s\n",
  511. __func__, strerror(errno));
  512. continue;
  513. }
  514. if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
  515. wait_for_card_driver(mic, mic->mic_net.virtio_net_fd,
  516. VIRTIO_ID_NET);
  517. /*
  518. * Check if there is data to be read from TUN and write to
  519. * virtio net fd if there is.
  520. */
  521. if (net_poll[NET_FD_TUN].revents & POLLIN) {
  522. copy.iov = iov0;
  523. len = readv(net_poll[NET_FD_TUN].fd,
  524. copy.iov, copy.iovcnt);
  525. if (len > 0) {
  526. struct virtio_net_hdr *hdr
  527. = (struct virtio_net_hdr *)vnet_hdr[0];
  528. /* Disable checksums on the card since we are on
  529. a reliable PCIe link */
  530. hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
  531. #ifdef DEBUG
  532. mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
  533. __func__, __LINE__, hdr->flags);
  534. mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
  535. copy.out_len, hdr->gso_type);
  536. #endif
  537. #ifdef DEBUG
  538. disp_iovec(mic, copy, __func__, __LINE__);
  539. mpsslog("%s %s %d read from tap 0x%lx\n",
  540. mic->name, __func__, __LINE__,
  541. len);
  542. #endif
  543. spin_for_descriptors(mic, &tx_vr);
  544. txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
  545. len);
  546. err = mic_virtio_copy(mic,
  547. mic->mic_net.virtio_net_fd, &tx_vr,
  548. &copy);
  549. if (err < 0) {
  550. mpsslog("%s %s %d mic_virtio_copy %s\n",
  551. mic->name, __func__, __LINE__,
  552. strerror(errno));
  553. }
  554. if (!err)
  555. verify_out_len(mic, &copy);
  556. #ifdef DEBUG
  557. disp_iovec(mic, copy, __func__, __LINE__);
  558. mpsslog("%s %s %d wrote to net 0x%lx\n",
  559. mic->name, __func__, __LINE__,
  560. sum_iovec_len(&copy));
  561. #endif
  562. /* Reinitialize IOV for next run */
  563. iov0[1].iov_len = MAX_NET_PKT_SIZE;
  564. } else if (len < 0) {
  565. disp_iovec(mic, &copy, __func__, __LINE__);
  566. mpsslog("%s %s %d read failed %s ", mic->name,
  567. __func__, __LINE__, strerror(errno));
  568. mpsslog("cnt %d sum %zd\n",
  569. copy.iovcnt, sum_iovec_len(&copy));
  570. }
  571. }
  572. /*
  573. * Check if there is data to be read from virtio net and
  574. * write to TUN if there is.
  575. */
  576. if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
  577. while (rx_vr.info->avail_idx !=
  578. le16toh(rx_vr.vr.avail->idx)) {
  579. copy.iov = iov1;
  580. txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
  581. MAX_NET_PKT_SIZE
  582. + sizeof(struct virtio_net_hdr));
  583. err = mic_virtio_copy(mic,
  584. mic->mic_net.virtio_net_fd, &rx_vr,
  585. &copy);
  586. if (!err) {
  587. #ifdef DEBUG
  588. struct virtio_net_hdr *hdr
  589. = (struct virtio_net_hdr *)
  590. vnet_hdr[1];
  591. mpsslog("%s %s %d hdr->flags 0x%x, ",
  592. mic->name, __func__, __LINE__,
  593. hdr->flags);
  594. mpsslog("out_len %d gso_type 0x%x\n",
  595. copy.out_len,
  596. hdr->gso_type);
  597. #endif
  598. /* Set the correct output iov_len */
  599. iov1[1].iov_len = copy.out_len -
  600. sizeof(struct virtio_net_hdr);
  601. verify_out_len(mic, &copy);
  602. #ifdef DEBUG
  603. disp_iovec(mic, copy, __func__,
  604. __LINE__);
  605. mpsslog("%s %s %d ",
  606. mic->name, __func__, __LINE__);
  607. mpsslog("read from net 0x%lx\n",
  608. sum_iovec_len(copy));
  609. #endif
  610. len = writev(net_poll[NET_FD_TUN].fd,
  611. copy.iov, copy.iovcnt);
  612. if (len != sum_iovec_len(&copy)) {
  613. mpsslog("Tun write failed %s ",
  614. strerror(errno));
  615. mpsslog("len 0x%zx ", len);
  616. mpsslog("read_len 0x%zx\n",
  617. sum_iovec_len(&copy));
  618. } else {
  619. #ifdef DEBUG
  620. disp_iovec(mic, &copy, __func__,
  621. __LINE__);
  622. mpsslog("%s %s %d ",
  623. mic->name, __func__,
  624. __LINE__);
  625. mpsslog("wrote to tap 0x%lx\n",
  626. len);
  627. #endif
  628. }
  629. } else {
  630. mpsslog("%s %s %d mic_virtio_copy %s\n",
  631. mic->name, __func__, __LINE__,
  632. strerror(errno));
  633. break;
  634. }
  635. }
  636. }
  637. if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
  638. mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
  639. }
  640. done:
  641. pthread_exit(NULL);
  642. }
  643. /* virtio_console */
  644. #define VIRTIO_CONSOLE_FD 0
  645. #define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
  646. #define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */
  647. #define MAX_BUFFER_SIZE PAGE_SIZE
  648. static void *
  649. virtio_console(void *arg)
  650. {
  651. static __u8 vcons_buf[2][PAGE_SIZE];
  652. struct iovec vcons_iov[2] = {
  653. { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
  654. { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
  655. };
  656. struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
  657. struct mic_info *mic = (struct mic_info *)arg;
  658. int err;
  659. struct pollfd console_poll[MAX_CONSOLE_FD];
  660. int pty_fd;
  661. char *pts_name;
  662. ssize_t len;
  663. struct mic_vring tx_vr, rx_vr;
  664. struct mic_copy_desc copy;
  665. struct mic_device_desc *desc;
  666. pty_fd = posix_openpt(O_RDWR);
  667. if (pty_fd < 0) {
  668. mpsslog("can't open a pseudoterminal master device: %s\n",
  669. strerror(errno));
  670. goto _return;
  671. }
  672. pts_name = ptsname(pty_fd);
  673. if (pts_name == NULL) {
  674. mpsslog("can't get pts name\n");
  675. goto _close_pty;
  676. }
  677. printf("%s console message goes to %s\n", mic->name, pts_name);
  678. mpsslog("%s console message goes to %s\n", mic->name, pts_name);
  679. err = grantpt(pty_fd);
  680. if (err < 0) {
  681. mpsslog("can't grant access: %s %s\n",
  682. pts_name, strerror(errno));
  683. goto _close_pty;
  684. }
  685. err = unlockpt(pty_fd);
  686. if (err < 0) {
  687. mpsslog("can't unlock a pseudoterminal: %s %s\n",
  688. pts_name, strerror(errno));
  689. goto _close_pty;
  690. }
  691. console_poll[MONITOR_FD].fd = pty_fd;
  692. console_poll[MONITOR_FD].events = POLLIN;
  693. console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
  694. console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
  695. if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
  696. VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
  697. virtcons_dev_page.dd.num_vq)) {
  698. mpsslog("%s init_vr failed %s\n",
  699. mic->name, strerror(errno));
  700. goto _close_pty;
  701. }
  702. copy.iovcnt = 1;
  703. desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
  704. for (;;) {
  705. console_poll[MONITOR_FD].revents = 0;
  706. console_poll[VIRTIO_CONSOLE_FD].revents = 0;
  707. err = poll(console_poll, MAX_CONSOLE_FD, -1);
  708. if (err < 0) {
  709. mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
  710. strerror(errno));
  711. continue;
  712. }
  713. if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
  714. wait_for_card_driver(mic,
  715. mic->mic_console.virtio_console_fd,
  716. VIRTIO_ID_CONSOLE);
  717. if (console_poll[MONITOR_FD].revents & POLLIN) {
  718. copy.iov = iov0;
  719. len = readv(pty_fd, copy.iov, copy.iovcnt);
  720. if (len > 0) {
  721. #ifdef DEBUG
  722. disp_iovec(mic, copy, __func__, __LINE__);
  723. mpsslog("%s %s %d read from tap 0x%lx\n",
  724. mic->name, __func__, __LINE__,
  725. len);
  726. #endif
  727. spin_for_descriptors(mic, &tx_vr);
  728. txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
  729. &copy, len);
  730. err = mic_virtio_copy(mic,
  731. mic->mic_console.virtio_console_fd,
  732. &tx_vr, &copy);
  733. if (err < 0) {
  734. mpsslog("%s %s %d mic_virtio_copy %s\n",
  735. mic->name, __func__, __LINE__,
  736. strerror(errno));
  737. }
  738. if (!err)
  739. verify_out_len(mic, &copy);
  740. #ifdef DEBUG
  741. disp_iovec(mic, copy, __func__, __LINE__);
  742. mpsslog("%s %s %d wrote to net 0x%lx\n",
  743. mic->name, __func__, __LINE__,
  744. sum_iovec_len(copy));
  745. #endif
  746. /* Reinitialize IOV for next run */
  747. iov0->iov_len = PAGE_SIZE;
  748. } else if (len < 0) {
  749. disp_iovec(mic, &copy, __func__, __LINE__);
  750. mpsslog("%s %s %d read failed %s ",
  751. mic->name, __func__, __LINE__,
  752. strerror(errno));
  753. mpsslog("cnt %d sum %zd\n",
  754. copy.iovcnt, sum_iovec_len(&copy));
  755. }
  756. }
  757. if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
  758. while (rx_vr.info->avail_idx !=
  759. le16toh(rx_vr.vr.avail->idx)) {
  760. copy.iov = iov1;
  761. txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
  762. &copy, PAGE_SIZE);
  763. err = mic_virtio_copy(mic,
  764. mic->mic_console.virtio_console_fd,
  765. &rx_vr, &copy);
  766. if (!err) {
  767. /* Set the correct output iov_len */
  768. iov1->iov_len = copy.out_len;
  769. verify_out_len(mic, &copy);
  770. #ifdef DEBUG
  771. disp_iovec(mic, copy, __func__,
  772. __LINE__);
  773. mpsslog("%s %s %d ",
  774. mic->name, __func__, __LINE__);
  775. mpsslog("read from net 0x%lx\n",
  776. sum_iovec_len(copy));
  777. #endif
  778. len = writev(pty_fd,
  779. copy.iov, copy.iovcnt);
  780. if (len != sum_iovec_len(&copy)) {
  781. mpsslog("Tun write failed %s ",
  782. strerror(errno));
  783. mpsslog("len 0x%zx ", len);
  784. mpsslog("read_len 0x%zx\n",
  785. sum_iovec_len(&copy));
  786. } else {
  787. #ifdef DEBUG
  788. disp_iovec(mic, copy, __func__,
  789. __LINE__);
  790. mpsslog("%s %s %d ",
  791. mic->name, __func__,
  792. __LINE__);
  793. mpsslog("wrote to tap 0x%lx\n",
  794. len);
  795. #endif
  796. }
  797. } else {
  798. mpsslog("%s %s %d mic_virtio_copy %s\n",
  799. mic->name, __func__, __LINE__,
  800. strerror(errno));
  801. break;
  802. }
  803. }
  804. }
  805. if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
  806. mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
  807. }
  808. _close_pty:
  809. close(pty_fd);
  810. _return:
  811. pthread_exit(NULL);
  812. }
  813. static void
  814. add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
  815. {
  816. char path[PATH_MAX];
  817. int fd, err;
  818. snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
  819. fd = open(path, O_RDWR);
  820. if (fd < 0) {
  821. mpsslog("Could not open %s %s\n", path, strerror(errno));
  822. return;
  823. }
  824. err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
  825. if (err < 0) {
  826. mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
  827. close(fd);
  828. return;
  829. }
  830. switch (dd->type) {
  831. case VIRTIO_ID_NET:
  832. mic->mic_net.virtio_net_fd = fd;
  833. mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
  834. break;
  835. case VIRTIO_ID_CONSOLE:
  836. mic->mic_console.virtio_console_fd = fd;
  837. mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
  838. break;
  839. case VIRTIO_ID_BLOCK:
  840. mic->mic_virtblk.virtio_block_fd = fd;
  841. mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
  842. break;
  843. }
  844. }
  845. static bool
  846. set_backend_file(struct mic_info *mic)
  847. {
  848. FILE *config;
  849. char buff[PATH_MAX], *line, *evv, *p;
  850. snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
  851. config = fopen(buff, "r");
  852. if (config == NULL)
  853. return false;
  854. do { /* look for "virtblk_backend=XXXX" */
  855. line = fgets(buff, PATH_MAX, config);
  856. if (line == NULL)
  857. break;
  858. if (*line == '#')
  859. continue;
  860. p = strchr(line, '\n');
  861. if (p)
  862. *p = '\0';
  863. } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
  864. fclose(config);
  865. if (line == NULL)
  866. return false;
  867. evv = strchr(line, '=');
  868. if (evv == NULL)
  869. return false;
  870. mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
  871. if (mic->mic_virtblk.backend_file == NULL) {
  872. mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
  873. return false;
  874. }
  875. strcpy(mic->mic_virtblk.backend_file, evv + 1);
  876. return true;
  877. }
  878. #define SECTOR_SIZE 512
  879. static bool
  880. set_backend_size(struct mic_info *mic)
  881. {
  882. mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
  883. SEEK_END);
  884. if (mic->mic_virtblk.backend_size < 0) {
  885. mpsslog("%s: can't seek: %s\n",
  886. mic->name, mic->mic_virtblk.backend_file);
  887. return false;
  888. }
  889. virtblk_dev_page.blk_config.capacity =
  890. mic->mic_virtblk.backend_size / SECTOR_SIZE;
  891. if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
  892. virtblk_dev_page.blk_config.capacity++;
  893. virtblk_dev_page.blk_config.capacity =
  894. htole64(virtblk_dev_page.blk_config.capacity);
  895. return true;
  896. }
  897. static bool
  898. open_backend(struct mic_info *mic)
  899. {
  900. if (!set_backend_file(mic))
  901. goto _error_exit;
  902. mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
  903. if (mic->mic_virtblk.backend < 0) {
  904. mpsslog("%s: can't open: %s\n", mic->name,
  905. mic->mic_virtblk.backend_file);
  906. goto _error_free;
  907. }
  908. if (!set_backend_size(mic))
  909. goto _error_close;
  910. mic->mic_virtblk.backend_addr = mmap(NULL,
  911. mic->mic_virtblk.backend_size,
  912. PROT_READ|PROT_WRITE, MAP_SHARED,
  913. mic->mic_virtblk.backend, 0L);
  914. if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
  915. mpsslog("%s: can't map: %s %s\n",
  916. mic->name, mic->mic_virtblk.backend_file,
  917. strerror(errno));
  918. goto _error_close;
  919. }
  920. return true;
  921. _error_close:
  922. close(mic->mic_virtblk.backend);
  923. _error_free:
  924. free(mic->mic_virtblk.backend_file);
  925. _error_exit:
  926. return false;
  927. }
  928. static void
  929. close_backend(struct mic_info *mic)
  930. {
  931. munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
  932. close(mic->mic_virtblk.backend);
  933. free(mic->mic_virtblk.backend_file);
  934. }
  935. static bool
  936. start_virtblk(struct mic_info *mic, struct mic_vring *vring)
  937. {
  938. if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
  939. mpsslog("%s: blk_config is not 8 byte aligned.\n",
  940. mic->name);
  941. return false;
  942. }
  943. add_virtio_device(mic, &virtblk_dev_page.dd);
  944. if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
  945. VIRTIO_ID_BLOCK, vring, NULL,
  946. virtblk_dev_page.dd.num_vq)) {
  947. mpsslog("%s init_vr failed %s\n",
  948. mic->name, strerror(errno));
  949. return false;
  950. }
  951. return true;
  952. }
  953. static void
  954. stop_virtblk(struct mic_info *mic)
  955. {
  956. int vr_size, ret;
  957. vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
  958. MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
  959. ret = munmap(mic->mic_virtblk.block_dp,
  960. MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
  961. if (ret < 0)
  962. mpsslog("%s munmap errno %d\n", mic->name, errno);
  963. close(mic->mic_virtblk.virtio_block_fd);
  964. }
  965. static __u8
  966. header_error_check(struct vring_desc *desc)
  967. {
  968. if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
  969. mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
  970. __func__, __LINE__);
  971. return -EIO;
  972. }
  973. if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
  974. mpsslog("%s() %d: alone\n",
  975. __func__, __LINE__);
  976. return -EIO;
  977. }
  978. if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
  979. mpsslog("%s() %d: not read\n",
  980. __func__, __LINE__);
  981. return -EIO;
  982. }
  983. return 0;
  984. }
  985. static int
  986. read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
  987. {
  988. struct iovec iovec;
  989. struct mic_copy_desc copy;
  990. iovec.iov_len = sizeof(*hdr);
  991. iovec.iov_base = hdr;
  992. copy.iov = &iovec;
  993. copy.iovcnt = 1;
  994. copy.vr_idx = 0; /* only one vring on virtio_block */
  995. copy.update_used = false; /* do not update used index */
  996. return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
  997. }
  998. static int
  999. transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
  1000. {
  1001. struct mic_copy_desc copy;
  1002. copy.iov = iovec;
  1003. copy.iovcnt = iovcnt;
  1004. copy.vr_idx = 0; /* only one vring on virtio_block */
  1005. copy.update_used = false; /* do not update used index */
  1006. return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
  1007. }
  1008. static __u8
  1009. status_error_check(struct vring_desc *desc)
  1010. {
  1011. if (le32toh(desc->len) != sizeof(__u8)) {
  1012. mpsslog("%s() %d: length is not sizeof(status)\n",
  1013. __func__, __LINE__);
  1014. return -EIO;
  1015. }
  1016. return 0;
  1017. }
  1018. static int
  1019. write_status(int fd, __u8 *status)
  1020. {
  1021. struct iovec iovec;
  1022. struct mic_copy_desc copy;
  1023. iovec.iov_base = status;
  1024. iovec.iov_len = sizeof(*status);
  1025. copy.iov = &iovec;
  1026. copy.iovcnt = 1;
  1027. copy.vr_idx = 0; /* only one vring on virtio_block */
  1028. copy.update_used = true; /* Update used index */
  1029. return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
  1030. }
  1031. static void *
  1032. virtio_block(void *arg)
  1033. {
  1034. struct mic_info *mic = (struct mic_info *)arg;
  1035. int ret;
  1036. struct pollfd block_poll;
  1037. struct mic_vring vring;
  1038. __u16 avail_idx;
  1039. __u32 desc_idx;
  1040. struct vring_desc *desc;
  1041. struct iovec *iovec, *piov;
  1042. __u8 status;
  1043. __u32 buffer_desc_idx;
  1044. struct virtio_blk_outhdr hdr;
  1045. void *fos;
  1046. for (;;) { /* forever */
  1047. if (!open_backend(mic)) { /* No virtblk */
  1048. for (mic->mic_virtblk.signaled = 0;
  1049. !mic->mic_virtblk.signaled;)
  1050. sleep(1);
  1051. continue;
  1052. }
  1053. /* backend file is specified. */
  1054. if (!start_virtblk(mic, &vring))
  1055. goto _close_backend;
  1056. iovec = malloc(sizeof(*iovec) *
  1057. le32toh(virtblk_dev_page.blk_config.seg_max));
  1058. if (!iovec) {
  1059. mpsslog("%s: can't alloc iovec: %s\n",
  1060. mic->name, strerror(ENOMEM));
  1061. goto _stop_virtblk;
  1062. }
  1063. block_poll.fd = mic->mic_virtblk.virtio_block_fd;
  1064. block_poll.events = POLLIN;
  1065. for (mic->mic_virtblk.signaled = 0;
  1066. !mic->mic_virtblk.signaled;) {
  1067. block_poll.revents = 0;
  1068. /* timeout in 1 sec to see signaled */
  1069. ret = poll(&block_poll, 1, 1000);
  1070. if (ret < 0) {
  1071. mpsslog("%s %d: poll failed: %s\n",
  1072. __func__, __LINE__,
  1073. strerror(errno));
  1074. continue;
  1075. }
  1076. if (!(block_poll.revents & POLLIN)) {
  1077. #ifdef DEBUG
  1078. mpsslog("%s %d: block_poll.revents=0x%x\n",
  1079. __func__, __LINE__, block_poll.revents);
  1080. #endif
  1081. continue;
  1082. }
  1083. /* POLLIN */
  1084. while (vring.info->avail_idx !=
  1085. le16toh(vring.vr.avail->idx)) {
  1086. /* read header element */
  1087. avail_idx =
  1088. vring.info->avail_idx &
  1089. (vring.vr.num - 1);
  1090. desc_idx = le16toh(
  1091. vring.vr.avail->ring[avail_idx]);
  1092. desc = &vring.vr.desc[desc_idx];
  1093. #ifdef DEBUG
  1094. mpsslog("%s() %d: avail_idx=%d ",
  1095. __func__, __LINE__,
  1096. vring.info->avail_idx);
  1097. mpsslog("vring.vr.num=%d desc=%p\n",
  1098. vring.vr.num, desc);
  1099. #endif
  1100. status = header_error_check(desc);
  1101. ret = read_header(
  1102. mic->mic_virtblk.virtio_block_fd,
  1103. &hdr, desc_idx);
  1104. if (ret < 0) {
  1105. mpsslog("%s() %d %s: ret=%d %s\n",
  1106. __func__, __LINE__,
  1107. mic->name, ret,
  1108. strerror(errno));
  1109. break;
  1110. }
  1111. /* buffer element */
  1112. piov = iovec;
  1113. status = 0;
  1114. fos = mic->mic_virtblk.backend_addr +
  1115. (hdr.sector * SECTOR_SIZE);
  1116. buffer_desc_idx = next_desc(desc);
  1117. desc_idx = buffer_desc_idx;
  1118. for (desc = &vring.vr.desc[buffer_desc_idx];
  1119. desc->flags & VRING_DESC_F_NEXT;
  1120. desc_idx = next_desc(desc),
  1121. desc = &vring.vr.desc[desc_idx]) {
  1122. piov->iov_len = desc->len;
  1123. piov->iov_base = fos;
  1124. piov++;
  1125. fos += desc->len;
  1126. }
  1127. /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
  1128. if (hdr.type & ~(VIRTIO_BLK_T_OUT |
  1129. VIRTIO_BLK_T_GET_ID)) {
  1130. /*
  1131. VIRTIO_BLK_T_IN - does not do
  1132. anything. Probably for documenting.
  1133. VIRTIO_BLK_T_SCSI_CMD - for
  1134. virtio_scsi.
  1135. VIRTIO_BLK_T_FLUSH - turned off in
  1136. config space.
  1137. VIRTIO_BLK_T_BARRIER - defined but not
  1138. used in anywhere.
  1139. */
  1140. mpsslog("%s() %d: type %x ",
  1141. __func__, __LINE__,
  1142. hdr.type);
  1143. mpsslog("is not supported\n");
  1144. status = -ENOTSUP;
  1145. } else {
  1146. ret = transfer_blocks(
  1147. mic->mic_virtblk.virtio_block_fd,
  1148. iovec,
  1149. piov - iovec);
  1150. if (ret < 0 &&
  1151. status != 0)
  1152. status = ret;
  1153. }
  1154. /* write status and update used pointer */
  1155. if (status != 0)
  1156. status = status_error_check(desc);
  1157. ret = write_status(
  1158. mic->mic_virtblk.virtio_block_fd,
  1159. &status);
  1160. #ifdef DEBUG
  1161. mpsslog("%s() %d: write status=%d on desc=%p\n",
  1162. __func__, __LINE__,
  1163. status, desc);
  1164. #endif
  1165. }
  1166. }
  1167. free(iovec);
  1168. _stop_virtblk:
  1169. stop_virtblk(mic);
  1170. _close_backend:
  1171. close_backend(mic);
  1172. } /* forever */
  1173. pthread_exit(NULL);
  1174. }
  1175. static void
  1176. reset(struct mic_info *mic)
  1177. {
  1178. #define RESET_TIMEOUT 120
  1179. int i = RESET_TIMEOUT;
  1180. setsysfs(mic->name, "state", "reset");
  1181. while (i) {
  1182. char *state;
  1183. state = readsysfs(mic->name, "state");
  1184. if (!state)
  1185. goto retry;
  1186. mpsslog("%s: %s %d state %s\n",
  1187. mic->name, __func__, __LINE__, state);
  1188. /*
  1189. * If the shutdown was initiated by OSPM, the state stays
  1190. * in "suspended" which is also a valid condition for reset.
  1191. */
  1192. if ((!strcmp(state, "offline")) ||
  1193. (!strcmp(state, "suspended"))) {
  1194. free(state);
  1195. break;
  1196. }
  1197. free(state);
  1198. retry:
  1199. sleep(1);
  1200. i--;
  1201. }
  1202. }
  1203. static int
  1204. get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
  1205. {
  1206. if (!strcmp(shutdown_status, "nop"))
  1207. return MIC_NOP;
  1208. if (!strcmp(shutdown_status, "crashed"))
  1209. return MIC_CRASHED;
  1210. if (!strcmp(shutdown_status, "halted"))
  1211. return MIC_HALTED;
  1212. if (!strcmp(shutdown_status, "poweroff"))
  1213. return MIC_POWER_OFF;
  1214. if (!strcmp(shutdown_status, "restart"))
  1215. return MIC_RESTART;
  1216. mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
  1217. /* Invalid state */
  1218. assert(0);
  1219. };
  1220. static int get_mic_state(struct mic_info *mic, char *state)
  1221. {
  1222. if (!strcmp(state, "offline"))
  1223. return MIC_OFFLINE;
  1224. if (!strcmp(state, "online"))
  1225. return MIC_ONLINE;
  1226. if (!strcmp(state, "shutting_down"))
  1227. return MIC_SHUTTING_DOWN;
  1228. if (!strcmp(state, "reset_failed"))
  1229. return MIC_RESET_FAILED;
  1230. if (!strcmp(state, "suspending"))
  1231. return MIC_SUSPENDING;
  1232. if (!strcmp(state, "suspended"))
  1233. return MIC_SUSPENDED;
  1234. mpsslog("%s: BUG invalid state %s\n", mic->name, state);
  1235. /* Invalid state */
  1236. assert(0);
  1237. };
  1238. static void mic_handle_shutdown(struct mic_info *mic)
  1239. {
  1240. #define SHUTDOWN_TIMEOUT 60
  1241. int i = SHUTDOWN_TIMEOUT, ret, stat = 0;
  1242. char *shutdown_status;
  1243. while (i) {
  1244. shutdown_status = readsysfs(mic->name, "shutdown_status");
  1245. if (!shutdown_status)
  1246. continue;
  1247. mpsslog("%s: %s %d shutdown_status %s\n",
  1248. mic->name, __func__, __LINE__, shutdown_status);
  1249. switch (get_mic_shutdown_status(mic, shutdown_status)) {
  1250. case MIC_RESTART:
  1251. mic->restart = 1;
  1252. case MIC_HALTED:
  1253. case MIC_POWER_OFF:
  1254. case MIC_CRASHED:
  1255. free(shutdown_status);
  1256. goto reset;
  1257. default:
  1258. break;
  1259. }
  1260. free(shutdown_status);
  1261. sleep(1);
  1262. i--;
  1263. }
  1264. reset:
  1265. ret = kill(mic->pid, SIGTERM);
  1266. mpsslog("%s: %s %d kill pid %d ret %d\n",
  1267. mic->name, __func__, __LINE__,
  1268. mic->pid, ret);
  1269. if (!ret) {
  1270. ret = waitpid(mic->pid, &stat,
  1271. WIFSIGNALED(stat));
  1272. mpsslog("%s: %s %d waitpid ret %d pid %d\n",
  1273. mic->name, __func__, __LINE__,
  1274. ret, mic->pid);
  1275. }
  1276. if (ret == mic->pid)
  1277. reset(mic);
  1278. }
  1279. static void *
  1280. mic_config(void *arg)
  1281. {
  1282. struct mic_info *mic = (struct mic_info *)arg;
  1283. char *state = NULL;
  1284. char pathname[PATH_MAX];
  1285. int fd, ret;
  1286. struct pollfd ufds[1];
  1287. char value[4096];
  1288. snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
  1289. MICSYSFSDIR, mic->name, "state");
  1290. fd = open(pathname, O_RDONLY);
  1291. if (fd < 0) {
  1292. mpsslog("%s: opening file %s failed %s\n",
  1293. mic->name, pathname, strerror(errno));
  1294. goto error;
  1295. }
  1296. do {
  1297. ret = read(fd, value, sizeof(value));
  1298. if (ret < 0) {
  1299. mpsslog("%s: Failed to read sysfs entry '%s': %s\n",
  1300. mic->name, pathname, strerror(errno));
  1301. goto close_error1;
  1302. }
  1303. retry:
  1304. state = readsysfs(mic->name, "state");
  1305. if (!state)
  1306. goto retry;
  1307. mpsslog("%s: %s %d state %s\n",
  1308. mic->name, __func__, __LINE__, state);
  1309. switch (get_mic_state(mic, state)) {
  1310. case MIC_SHUTTING_DOWN:
  1311. mic_handle_shutdown(mic);
  1312. goto close_error;
  1313. case MIC_SUSPENDING:
  1314. mic->boot_on_resume = 1;
  1315. setsysfs(mic->name, "state", "suspend");
  1316. mic_handle_shutdown(mic);
  1317. goto close_error;
  1318. case MIC_OFFLINE:
  1319. if (mic->boot_on_resume) {
  1320. setsysfs(mic->name, "state", "boot");
  1321. mic->boot_on_resume = 0;
  1322. }
  1323. break;
  1324. default:
  1325. break;
  1326. }
  1327. free(state);
  1328. ufds[0].fd = fd;
  1329. ufds[0].events = POLLERR | POLLPRI;
  1330. ret = poll(ufds, 1, -1);
  1331. if (ret < 0) {
  1332. mpsslog("%s: poll failed %s\n",
  1333. mic->name, strerror(errno));
  1334. goto close_error1;
  1335. }
  1336. } while (1);
  1337. close_error:
  1338. free(state);
  1339. close_error1:
  1340. close(fd);
  1341. error:
  1342. init_mic(mic);
  1343. pthread_exit(NULL);
  1344. }
  1345. static void
  1346. set_cmdline(struct mic_info *mic)
  1347. {
  1348. char buffer[PATH_MAX];
  1349. int len;
  1350. len = snprintf(buffer, PATH_MAX,
  1351. "clocksource=tsc highres=off nohz=off ");
  1352. len += snprintf(buffer + len, PATH_MAX,
  1353. "cpufreq_on;corec6_off;pc3_off;pc6_off ");
  1354. len += snprintf(buffer + len, PATH_MAX,
  1355. "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
  1356. mic->id);
  1357. setsysfs(mic->name, "cmdline", buffer);
  1358. mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
  1359. snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id);
  1360. mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
  1361. }
  1362. static void
  1363. set_log_buf_info(struct mic_info *mic)
  1364. {
  1365. int fd;
  1366. off_t len;
  1367. char system_map[] = "/lib/firmware/mic/System.map";
  1368. char *map, *temp, log_buf[17] = {'\0'};
  1369. fd = open(system_map, O_RDONLY);
  1370. if (fd < 0) {
  1371. mpsslog("%s: Opening System.map failed: %d\n",
  1372. mic->name, errno);
  1373. return;
  1374. }
  1375. len = lseek(fd, 0, SEEK_END);
  1376. if (len < 0) {
  1377. mpsslog("%s: Reading System.map size failed: %d\n",
  1378. mic->name, errno);
  1379. close(fd);
  1380. return;
  1381. }
  1382. map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
  1383. if (map == MAP_FAILED) {
  1384. mpsslog("%s: mmap of System.map failed: %d\n",
  1385. mic->name, errno);
  1386. close(fd);
  1387. return;
  1388. }
  1389. temp = strstr(map, "__log_buf");
  1390. if (!temp) {
  1391. mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
  1392. munmap(map, len);
  1393. close(fd);
  1394. return;
  1395. }
  1396. strncpy(log_buf, temp - 19, 16);
  1397. setsysfs(mic->name, "log_buf_addr", log_buf);
  1398. mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
  1399. temp = strstr(map, "log_buf_len");
  1400. if (!temp) {
  1401. mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
  1402. munmap(map, len);
  1403. close(fd);
  1404. return;
  1405. }
  1406. strncpy(log_buf, temp - 19, 16);
  1407. setsysfs(mic->name, "log_buf_len", log_buf);
  1408. mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
  1409. munmap(map, len);
  1410. close(fd);
  1411. }
  1412. static void init_mic(struct mic_info *mic);
  1413. static void
  1414. change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
  1415. {
  1416. struct mic_info *mic;
  1417. for (mic = mic_list.next; mic != NULL; mic = mic->next)
  1418. mic->mic_virtblk.signaled = 1/* true */;
  1419. }
  1420. static void
  1421. init_mic(struct mic_info *mic)
  1422. {
  1423. struct sigaction ignore = {
  1424. .sa_flags = 0,
  1425. .sa_handler = SIG_IGN
  1426. };
  1427. struct sigaction act = {
  1428. .sa_flags = SA_SIGINFO,
  1429. .sa_sigaction = change_virtblk_backend,
  1430. };
  1431. char buffer[PATH_MAX];
  1432. int err;
  1433. /*
  1434. * Currently, one virtio block device is supported for each MIC card
  1435. * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
  1436. * The signal informs the virtio block backend about a change in the
  1437. * configuration file which specifies the virtio backend file name on
  1438. * the host. Virtio block backend then re-reads the configuration file
  1439. * and switches to the new block device. This signalling mechanism may
  1440. * not be required once multiple virtio block devices are supported by
  1441. * the MIC daemon.
  1442. */
  1443. sigaction(SIGUSR1, &ignore, NULL);
  1444. mic->pid = fork();
  1445. switch (mic->pid) {
  1446. case 0:
  1447. set_log_buf_info(mic);
  1448. set_cmdline(mic);
  1449. add_virtio_device(mic, &virtcons_dev_page.dd);
  1450. add_virtio_device(mic, &virtnet_dev_page.dd);
  1451. err = pthread_create(&mic->mic_console.console_thread, NULL,
  1452. virtio_console, mic);
  1453. if (err)
  1454. mpsslog("%s virtcons pthread_create failed %s\n",
  1455. mic->name, strerror(err));
  1456. err = pthread_create(&mic->mic_net.net_thread, NULL,
  1457. virtio_net, mic);
  1458. if (err)
  1459. mpsslog("%s virtnet pthread_create failed %s\n",
  1460. mic->name, strerror(err));
  1461. err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
  1462. virtio_block, mic);
  1463. if (err)
  1464. mpsslog("%s virtblk pthread_create failed %s\n",
  1465. mic->name, strerror(err));
  1466. sigemptyset(&act.sa_mask);
  1467. err = sigaction(SIGUSR1, &act, NULL);
  1468. if (err)
  1469. mpsslog("%s sigaction SIGUSR1 failed %s\n",
  1470. mic->name, strerror(errno));
  1471. while (1)
  1472. sleep(60);
  1473. case -1:
  1474. mpsslog("fork failed MIC name %s id %d errno %d\n",
  1475. mic->name, mic->id, errno);
  1476. break;
  1477. default:
  1478. if (mic->restart) {
  1479. snprintf(buffer, PATH_MAX, "boot");
  1480. setsysfs(mic->name, "state", buffer);
  1481. mpsslog("%s restarting mic %d\n",
  1482. mic->name, mic->restart);
  1483. mic->restart = 0;
  1484. }
  1485. pthread_create(&mic->config_thread, NULL, mic_config, mic);
  1486. }
  1487. }
  1488. static void
  1489. start_daemon(void)
  1490. {
  1491. struct mic_info *mic;
  1492. for (mic = mic_list.next; mic != NULL; mic = mic->next)
  1493. init_mic(mic);
  1494. while (1)
  1495. sleep(60);
  1496. }
  1497. static int
  1498. init_mic_list(void)
  1499. {
  1500. struct mic_info *mic = &mic_list;
  1501. struct dirent *file;
  1502. DIR *dp;
  1503. int cnt = 0;
  1504. dp = opendir(MICSYSFSDIR);
  1505. if (!dp)
  1506. return 0;
  1507. while ((file = readdir(dp)) != NULL) {
  1508. if (!strncmp(file->d_name, "mic", 3)) {
  1509. mic->next = calloc(1, sizeof(struct mic_info));
  1510. if (mic->next) {
  1511. mic = mic->next;
  1512. mic->id = atoi(&file->d_name[3]);
  1513. mic->name = malloc(strlen(file->d_name) + 16);
  1514. if (mic->name)
  1515. strcpy(mic->name, file->d_name);
  1516. mpsslog("MIC name %s id %d\n", mic->name,
  1517. mic->id);
  1518. cnt++;
  1519. }
  1520. }
  1521. }
  1522. closedir(dp);
  1523. return cnt;
  1524. }
  1525. void
  1526. mpsslog(char *format, ...)
  1527. {
  1528. va_list args;
  1529. char buffer[4096];
  1530. char ts[52], *ts1;
  1531. time_t t;
  1532. if (logfp == NULL)
  1533. return;
  1534. va_start(args, format);
  1535. vsprintf(buffer, format, args);
  1536. va_end(args);
  1537. time(&t);
  1538. ts1 = ctime_r(&t, ts);
  1539. ts1[strlen(ts1) - 1] = '\0';
  1540. fprintf(logfp, "%s: %s", ts1, buffer);
  1541. fflush(logfp);
  1542. }
  1543. int
  1544. main(int argc, char *argv[])
  1545. {
  1546. int cnt;
  1547. pid_t pid;
  1548. myname = argv[0];
  1549. logfp = fopen(LOGFILE_NAME, "a+");
  1550. if (!logfp) {
  1551. fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
  1552. exit(1);
  1553. }
  1554. pid = fork();
  1555. switch (pid) {
  1556. case 0:
  1557. break;
  1558. case -1:
  1559. exit(2);
  1560. default:
  1561. exit(0);
  1562. }
  1563. mpsslog("MIC Daemon start\n");
  1564. cnt = init_mic_list();
  1565. if (cnt == 0) {
  1566. mpsslog("MIC module not loaded\n");
  1567. exit(3);
  1568. }
  1569. mpsslog("MIC found %d devices\n", cnt);
  1570. start_daemon();
  1571. exit(0);
  1572. }