virtio_lib.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. /*
  2. * This file is part of the Harvey operating system. It is subject to the
  3. * license terms of the GNU GPL v2 in LICENSE.gpl found in the top-level
  4. * directory of this distribution and at http://www.gnu.org/licenses/gpl-2.0.txt
  5. *
  6. * No part of Harvey operating system, including this file, may be copied,
  7. * modified, propagated, or distributed except according to the terms
  8. * contained in the LICENSE.gpl file.
  9. */
  10. #include "u.h"
  11. #include "../port/lib.h"
  12. #include "mem.h"
  13. #include "dat.h"
  14. #include "fns.h"
  15. #include "io.h"
  16. #include "../port/error.h"
  17. // Include the definitions from VIRTIO spec v1.0
  18. // http://docs.oasis-open.org/virtio/virtio/v1.0/csprd02/listings/virtio_ring.h
  19. #include "virtio_ring.h"
  20. #include "virtio_config.h"
  21. #include "virtio_pci.h"
  22. #include "virtio_lib.h"
  23. #define MAXVQS 8 // maximal detectable number of VQs per device
  24. static uint32_t nvq; // number of the detected virtio9p devices
  25. static Vqctl **cvq; // array of device control structure pointers, length = nvq
  26. // Map device identifiers to descriptive strings to display in IO port
  27. // and interrupt allocation maps.
  28. typedef struct
  29. {
  30. uint16_t did;
  31. char *desc;
  32. } didmap;
  33. static didmap dmtab[] = {
  34. PCI_DEVICE_ID_VIRTIO_NET, "virtio-net",
  35. PCI_DEVICE_ID_VIRTIO_BLOCK, "virtio-block",
  36. PCI_DEVICE_ID_VIRTIO_BALLOON, "virtio-balloon",
  37. PCI_DEVICE_ID_VIRTIO_CONSOLE, "virtio-console",
  38. PCI_DEVICE_ID_VIRTIO_SCSI, "virtio-scsi",
  39. PCI_DEVICE_ID_VIRTIO_RNG, "virtio-rng",
  40. PCI_DEVICE_ID_VIRTIO_9P, "virtio-9p"
  41. };
  42. // Find a device type by its PCI device identifier, used to assign device name in the filesystem,
  43. // and to determine the flavor of read-write operations.
  44. static didmap *
  45. finddev(Vqctl *vc)
  46. {
  47. for(int i = 0; i < nelem(dmtab) ; i++) {
  48. if(vc->pci->did == dmtab[i].did) {
  49. return (dmtab + i);
  50. }
  51. }
  52. return nil;
  53. }
  54. // Map PCI device identifier to a readable name for the filesystem entry.
  55. static char *
  56. mapdev(Vqctl *vc)
  57. {
  58. char *dmap = nil;
  59. didmap *dm = finddev(vc);
  60. if(dm != nil)
  61. dmap = dm->desc;
  62. return dmap;
  63. }
  64. static int
  65. viodone(void *arg)
  66. {
  67. return ((Rock*)arg)->done;
  68. }
  69. static void
  70. vqinterrupt(Virtq *q);
  71. // The interrupt handler entry point. Handler will be dispatched based on
  72. // the bit set in the interrupt status register. If bit 1 is set then a virtqueue
  73. // has to be handled, otherwise the device connfig area was updated. Reflect this
  74. // in the reported device config area modification time.
  75. // If the device's vq IO map is entirely zero, service all existing queues in turn.
  76. // Otherwise only those queues whose bit is set.
  77. static void
  78. vqintr(Ureg *x, void *arg)
  79. {
  80. Vqctl *dev = arg;
  81. uint8_t isr = inb(dev->port + VIRTIO_PCI_ISR);
  82. if(isr & 2) {
  83. dev->dcmtime = seconds();
  84. return;
  85. } else if(isr & 1) {
  86. for(int i = 0; i < dev->nqs; i++) {
  87. vqinterrupt(dev->vqs[i]);
  88. }
  89. }
  90. return;
  91. }
  92. // The interrupt handler part that handles the virtqueue.
  93. static void
  94. vqinterrupt(Virtq *q)
  95. {
  96. int id, m;
  97. Rock *r;
  98. Rendez *z;
  99. m = q->vr.num - 1;
  100. ilock(&q->l);
  101. while((q->lastused ^ q->vr.used->idx) & m) {
  102. id = q->vr.used->ring[q->lastused++ & m].id;
  103. if(r = q->rock[id]){
  104. q->rock[id] = nil;
  105. z = r->sleep;
  106. r->done = 1; /* hands off */
  107. if(z != nil)
  108. wakeup(z);
  109. }
  110. }
  111. iunlock(&q->l);
  112. }
  113. // Release a given number of descriptors back to the virtqueue.
  114. void
  115. reldescr(Virtq *q, int n, uint16_t *descr)
  116. {
  117. ilock(&q->l);
  118. for(int i = 0; i < n; i++) {
  119. q2descr(q, descr[i])->next = q->free;
  120. q->free = descr[i];
  121. q->nfree++;
  122. }
  123. iunlock(&q->l);
  124. }
  125. // Obtain a given number of descriptiors from the virtqueue. If the number of free descriptors is low, wait
  126. // until available. Return value: number of descriptors allocated (should be same as requested),
  127. // or -1 (if number of descriptors requested is more than the queue length). The caller should preallocate
  128. // an array of uint16_t values of the same or larger size as the number of descriptors requested;
  129. // this array will be populated. If more than one descriptor is requested, the descriptors allocated
  130. // will be chained in the order of the increasing indice of the array.
  131. int
  132. getdescr(Virtq *q, int n, uint16_t *descr)
  133. {
  134. if(n > q->vr.num)
  135. return -1;
  136. Proc *up = externup();
  137. ilock(&q->l);
  138. while(q->nfree < n) {
  139. iunlock(&q->l);
  140. if(!waserror())
  141. tsleep(&up->sleep, return0, 0, 500);
  142. poperror();
  143. ilock(&q->l);
  144. }
  145. for(int i = 0; i < n; i++) {
  146. int di = q->free;
  147. descr[i] = di;
  148. struct vring_desc *d = &q->vr.desc[di];
  149. q->free = d->next;
  150. q->nfree--;
  151. d->flags = 0;
  152. d->next = 0;
  153. if(i > 0) {
  154. struct vring_desc *pd = &q->vr.desc[descr[i - 1]];
  155. pd->flags = VRING_DESC_F_NEXT;
  156. pd->next = di;
  157. }
  158. }
  159. iunlock(&q->l);
  160. return n;
  161. }
  162. // Place a given number of populated descriptors into the virtqueue. Descriptor indices are
  163. // provided in an array. Descriptors will be queued in the order of increasing array index.
  164. // The process issuing this call will be suspended until the I/O operation on the virtqueue
  165. // completes. It is the calling process responsibility to return the used descriptors
  166. // to the queue.
  167. int
  168. queuedescr(Virtq *q, int n, uint16_t *descr)
  169. {
  170. Proc *up = externup();
  171. int head = descr[0];
  172. uint16_t mask = q->vr.num - 1; // q->num is power of 2 so mask has all bits set
  173. Rock rock; // the sleep-wakeup semaphore on the process stack
  174. rock.done = 0;
  175. rock.sleep = &up->sleep;
  176. ilock(&q->l);
  177. q->rock[head] = &rock;
  178. for(int i = 0; i < n; i++) {
  179. q->vr.avail->ring[q->vr.avail->idx & mask] = descr[i];
  180. q->vr.avail->idx++;
  181. }
  182. coherence();
  183. iunlock(&q->l);
  184. if((q->vr.used->flags & VRING_USED_F_NO_NOTIFY) == 0) {
  185. uint32_t nport = ((Vqctl *)(q->pdev))->port + VIRTIO_PCI_QUEUE_NOTIFY;
  186. outs(nport, q->idx);
  187. }
  188. while(!rock.done) {
  189. sleep(rock.sleep, viodone, &rock);
  190. }
  191. return 0;
  192. }
  193. // Allocate space for a single queue and initialize its descriptor. This is normally called at startup
  194. // for every device's every queue discovered. It may however be necessary to process virtqueue hotplug
  195. // events as with virtio-console, so this procedure can be called independently.
  196. int
  197. vqalloc(Virtq **pq, int qs)
  198. {
  199. *pq = mallocz(sizeof(Virtq) + qs * sizeof(Rock *), 1);
  200. if(*pq == nil)
  201. return -1;
  202. Virtq *q = *pq;
  203. uint64_t vrsize = vring_size(qs, PGSZ);
  204. q->vq = mallocalign(vrsize, PGSZ, 0, 0);
  205. if(q->vq == nil)
  206. return -1;
  207. memset(q->vq, 0, vrsize);
  208. vring_init(&q->vr, qs, q->vq, PGSZ);
  209. q->free = -1;
  210. q->nfree = qs;
  211. for(int i = 0; i < qs; i++) {
  212. q->vr.desc[i].next = q->free;
  213. q->free = i;
  214. }
  215. return 0;
  216. }
  217. // Scan virtqueues for the given device. If the vqs argument is not nil then
  218. // nvq is expected to contain the length of the array vqs points to. In this case
  219. // populate the Virtq structures for each virtqueue found. Otherwise just return
  220. // the number of virtqueues detected. The port argument contains the base port
  221. // for the device being scanned.
  222. // Some devices like console report very large number of virtqueues. Whether it is a bug in QEMU
  223. // or normal behavior we limit the maximum number of virtqueues serviced to 8.
  224. static int
  225. findvqs(uint32_t port, int nvq, Virtq **vqs)
  226. {
  227. int cnt = 0;
  228. while(1) {
  229. outs(port + VIRTIO_PCI_QUEUE_SEL, cnt);
  230. int qs = ins(port + VIRTIO_PCI_QUEUE_NUM);
  231. if(cnt >= MAXVQS || qs == 0 || (qs & (qs-1)) != 0)
  232. break;
  233. if(vqs != nil) {
  234. // Allocate vq's descriptor space, used and available spaces, all page-aligned.
  235. if(vqalloc(&vqs[cnt], qs) < 0) {
  236. print("no memory to allocate a virtqueue\n");
  237. break;
  238. }
  239. coherence();
  240. uint64_t paddr=PADDR(vqs[cnt]->vq);
  241. outl(port + VIRTIO_PCI_QUEUE_PFN, paddr/PGSZ);
  242. }
  243. cnt++;
  244. }
  245. return cnt;
  246. }
  247. // Scan the PCI devices list for possible virtio devices. If the vcs argument
  248. // is not nil then populate the array of control structures, otherwise just return
  249. // the number of devices found. This function is intended to be called twice,
  250. // once with vcs = nil just to count the devices, and the second time to populate
  251. // the control structures, expecting vcs to point to an array of pointers to device
  252. // descriptors of sufficient length.
  253. int
  254. initvdevs(Vqctl **vcs)
  255. {
  256. int cnt = 0;
  257. // TODO: this seems to work as if MSI-X is not enabled (device conf space starts at 20).
  258. // Find out how to deduce msix_enabled from the device.
  259. int msix_enabled = 0;
  260. Pcidev *p;
  261. // Scan the collected PCI devices info, find possible 9p devices
  262. for(p = nil; p = pcimatch(p, PCI_VENDOR_ID_REDHAT_QUMRANET, 0);) {
  263. if(vcs != nil) {
  264. vcs[cnt] = mallocz(sizeof(Vqctl), 1);
  265. if(vcs[cnt] == nil) {
  266. return cnt;
  267. }
  268. // Use the legacy interface
  269. // Allocate the BAR0 I/O space to the driver
  270. Vqctl *vc = vcs[cnt];
  271. vc->pci = p;
  272. vc->port = p->mem[0].bar & ~0x1;
  273. char *dmap = mapdev(vc);
  274. snprint(vc->devname, sizeof(vc->devname), "%s-%d", dmap?dmap:"virtio-pci", cnt);
  275. if(ioalloc(vc->port, p->mem[0].size, 0, vc->devname) < 0) {
  276. free(vc);
  277. vcs[cnt] = nil;
  278. return cnt;
  279. }
  280. // Device reset
  281. outb(vc->port + VIRTIO_PCI_STATUS, 0);
  282. outb(vc->port + VIRTIO_PCI_STATUS, VIRTIO_CONFIG_S_ACKNOWLEDGE|VIRTIO_CONFIG_S_DRIVER);
  283. int nqs = findvqs(vc->port, 0, nil);
  284. // For each vq allocate and populate its descriptor
  285. if(nqs > 0) {
  286. vc->vqs = mallocz(nqs * sizeof(Virtq *), 1);
  287. vc->nqs = nqs;
  288. findvqs(vc->port, nqs, vc->vqs);
  289. for(int i = 0; i < nqs; i++) {
  290. Virtq *q = vc->vqs[i];
  291. q->idx = i;
  292. q->pdev = vc;
  293. }
  294. }
  295. // Device config space contains data in consecutive 8bit input ports
  296. vc->dcfgoff = VIRTIO_PCI_CONFIG_OFF(msix_enabled);
  297. vc->dcfglen = vc->pci->mem[0].size - vc->dcfgoff;
  298. // Assume that the device config was modified just now
  299. vc->dcmtime = -1;
  300. }
  301. cnt++;
  302. }
  303. return cnt;
  304. }
  305. // Identity finction for device features.
  306. static uint32_t
  307. acceptallfeat(uint32_t feat)
  308. {
  309. return feat;
  310. }
  311. // Negotiate on device features. Read in the features bitmap, alter as needed by the function
  312. // provided, write back to the device. If nil is provided as the function, write back unchanged
  313. // that is, accept whatever is offered (often nothing). Return the feature bits accepted, store
  314. // the same in the device control structure.
  315. uint32_t
  316. vdevfeat(Vqctl *vc, uint32_t(*ffltr)(uint32_t))
  317. {
  318. uint32_t feat = inl(vc->port + VIRTIO_PCI_HOST_FEATURES);
  319. uint32_t rfeat = ffltr?(*ffltr)(feat):acceptallfeat(feat);
  320. rfeat &= feat; // do not introduce new bits, we can only reject existing
  321. vc->feat = rfeat;
  322. outl(vc->port + VIRTIO_PCI_GUEST_FEATURES, rfeat);
  323. return rfeat;
  324. }
  325. // Final device initialization, enable interrupts.
  326. // While initvdevs should be called once for all devices during the OS startup, finalinitdev
  327. // should be called once per device, from the device-specific part of the driver. If the driver
  328. // needs other interrupt handler than the default one, this function should not be called, and
  329. // custom logic should be provided instead.
  330. void
  331. finalinitvdev(Vqctl *vc)
  332. {
  333. intrenable(vc->pci->intl, vqintr, vc, vc->pci->tbdf, vc->devname);
  334. outb(vc->port + VIRTIO_PCI_STATUS, inb(vc->port + VIRTIO_PCI_STATUS) | VIRTIO_CONFIG_S_DRIVER_OK);
  335. }
  336. // Read device configuration area into the given buffer at the given offset in the area.
  337. // Returned is number of bytes actually read. Reading is performed byte by byte, so endianness
  338. // is preserved. The program that reads the configuration area should take care of endianness conversion.
  339. int
  340. readvdevcfg(Vqctl *vc, void *va, int32_t n, int64_t offset)
  341. {
  342. int8_t *a = va;
  343. uint32_t r = offset;
  344. int i;
  345. for(i = 0; i < n; a++, i++) {
  346. if(i + r >= vc->dcfglen)
  347. break;
  348. uint8_t b = inb(vc->port + vc->dcfgoff + i + r);
  349. PBIT8(a, b);
  350. }
  351. return i;
  352. }
  353. // Initialize virtio globally (to be called once during startup).
  354. void
  355. virtiosetup()
  356. {
  357. if(nvq != 0 || cvq != nil)
  358. return; // avoid repeated calls
  359. print("virtio: initializing\n");
  360. nvq = initvdevs(nil);
  361. if(nvq == 0) {
  362. print("virtio: no devices\n");
  363. return; // nothing found
  364. }
  365. cvq = mallocz(nvq * sizeof(Vqctl *), 1);
  366. if(cvq == nil) {
  367. print("virtiosetup: failed to allocate control structures\n");
  368. nvq = 0;
  369. return;
  370. }
  371. initvdevs(cvq);
  372. print("virtio: initialized\n");
  373. }
  374. // Get pointer to a virtio device by its index. Nil is returned if idx is out of range.
  375. Vqctl *
  376. vdevbyidx(uint32_t idx)
  377. {
  378. if(idx >= nvq)
  379. return nil;
  380. return cvq[idx];
  381. }
  382. // Get total number of virtio devices defined at the moment.
  383. uint32_t
  384. getvdevnum(void)
  385. {
  386. return nvq;
  387. }
  388. // Find all devices of given type (e. g. PCI_DEVICE_ID_VIRTIO_NET). An array of sufficient length
  389. // should be provided; it will be filled out with the device references found. Returned is the number
  390. // of devices found.
  391. uint32_t
  392. getvdevsbypciid(int pciid, Vqctl **vqs, uint32_t n)
  393. {
  394. uint32_t j = 0;
  395. if(n < 1 || nvq <= 0)
  396. return 0;
  397. for(int i = 0; i < nvq ; i++) {
  398. if(cvq[i]->pci->did == pciid)
  399. vqs[j++] = cvq[i];
  400. if(j >= n)
  401. break;
  402. }
  403. return j;
  404. }