ether82598.c 23 KB


  1. /*
  2. * intel pci-express 10Gb ethernet driver for 8259[89]
  3. * copyright © 2007, coraid, inc.
  4. * depessimised and made to work on the 82599 at bell labs, 2013.
  5. *
  6. * 82599 requests should ideally not cross a 4KB (page) boundary.
  7. */
  8. #include "u.h"
  9. #include "../port/lib.h"
  10. #include "mem.h"
  11. #include "dat.h"
  12. #include "fns.h"
  13. #include "io.h"
  14. #include "../port/error.h"
  15. #include "../port/netif.h"
  16. #include "etherif.h"
  17. #define NEXTPOW2(x, m) (((x)+1) & (m))
  18. enum {
  19. Rbsz = ETHERMAXTU+32, /* +slop is for vlan headers, crcs, etc. */
  20. Descalign= 128, /* 599 manual needs 128-byte alignment */
  21. /* tunable parameters */
  22. Nrd = 256, /* multiple of 8, power of 2 for NEXTPOW2 */
  23. Nrb = 1024,
  24. Ntd = 128, /* multiple of 8, power of 2 for NEXTPOW2 */
  25. Goslow = 0, /* flag: go slow by throttling intrs, etc. */
  26. };
  27. enum {
  28. /* general */
  29. Ctrl = 0x00000/4, /* Device Control */
  30. Status = 0x00008/4, /* Device Status */
  31. Ctrlext = 0x00018/4, /* Extended Device Control */
  32. Esdp = 0x00020/4, /* extended sdp control */
  33. Esodp = 0x00028/4, /* extended od sdp control (i2cctl on 599) */
  34. Ledctl = 0x00200/4, /* led control */
  35. Tcptimer = 0x0004c/4, /* tcp timer */
  36. Ecc = 0x110b0/4, /* errata ecc control magic (pcie intr cause on 599) */
  37. /* nvm */
  38. Eec = 0x10010/4, /* eeprom/flash control */
  39. Eerd = 0x10014/4, /* eeprom read */
  40. Fla = 0x1001c/4, /* flash access */
  41. Flop = 0x1013c/4, /* flash opcode */
  42. Grc = 0x10200/4, /* general rx control */
  43. /* interrupt */
  44. Icr = 0x00800/4, /* interrupt cause read */
  45. Ics = 0x00808/4, /* " set */
  46. Ims = 0x00880/4, /* " mask read/set (actually enable) */
  47. Imc = 0x00888/4, /* " mask clear */
  48. Iac = 0x00810/4, /* " auto clear */
  49. Iam = 0x00890/4, /* " auto mask enable */
  50. Itr = 0x00820/4, /* " throttling rate regs (0-19) */
  51. Ivar = 0x00900/4, /* " vector allocation regs. */
  52. /* msi interrupt */
  53. Msixt = 0x0000/4, /* msix table (bar3) */
  54. Msipba = 0x2000/4, /* msix pending bit array (bar3) */
  55. Pbacl = 0x11068/4, /* pba clear */
  56. Gpie = 0x00898/4, /* general purpose int enable */
  57. /* flow control */
  58. Pfctop = 0x03008/4, /* priority flow ctl type opcode */
  59. Fcttv = 0x03200/4, /* " transmit timer value (0-3) */
  60. Fcrtl = 0x03220/4, /* " rx threshold low (0-7) +8n */
  61. Fcrth = 0x03260/4, /* " rx threshold high (0-7) +8n */
  62. Rcrtv = 0x032a0/4, /* " refresh value threshold */
  63. Tfcs = 0x0ce00/4, /* " tx status */
  64. /* rx dma */
  65. Rbal = 0x01000/4, /* rx desc base low (0-63) +0x40n */
  66. Rbah = 0x01004/4, /* " high */
  67. Rdlen = 0x01008/4, /* " length */
  68. Rdh = 0x01010/4, /* " head */
  69. Rdt = 0x01018/4, /* " tail */
  70. Rxdctl = 0x01028/4, /* " control */
  71. Srrctl = 0x02100/4, /* split & replication rx ctl. array */
  72. Dcarxctl = 0x02200/4, /* rx dca control */
  73. Rdrxctl = 0x02f00/4, /* rx dma control */
  74. Rxpbsize = 0x03c00/4, /* rx packet buffer size */
  75. Rxctl = 0x03000/4, /* rx control */
  76. Dropen = 0x03d04/4, /* drop enable control (598 only) */
  77. /* rx */
  78. Rxcsum = 0x05000/4, /* rx checksum control */
  79. Rfctl = 0x05008/4, /* rx filter control */
  80. Mta = 0x05200/4, /* multicast table array (0-127) */
  81. Ral98 = 0x05400/4, /* rx address low (598) */
  82. Rah98 = 0x05404/4,
  83. Ral99 = 0x0a200/4, /* rx address low array (599) */
  84. Rah99 = 0x0a204/4,
  85. Psrtype = 0x05480/4, /* packet split rx type. */
  86. Vfta = 0x0a000/4, /* vlan filter table array. */
  87. Fctrl = 0x05080/4, /* filter control */
  88. Vlnctrl = 0x05088/4, /* vlan control */
  89. Msctctrl = 0x05090/4, /* multicast control */
  90. Mrqc = 0x05818/4, /* multiple rx queues cmd */
  91. Vmdctl = 0x0581c/4, /* vmdq control (598 only) */
  92. Imir = 0x05a80/4, /* immediate irq rx (0-7) (598 only) */
  93. Imirext = 0x05aa0/4, /* immediate irq rx ext (598 only) */
  94. Imirvp = 0x05ac0/4, /* immediate irq vlan priority (598 only) */
  95. Reta = 0x05c00/4, /* redirection table */
  96. Rssrk = 0x05c80/4, /* rss random key */
  97. /* tx */
  98. Tdbal = 0x06000/4, /* tx desc base low +0x40n array */
  99. Tdbah = 0x06004/4, /* " high */
  100. Tdlen = 0x06008/4, /* " len */
  101. Tdh = 0x06010/4, /* " head */
  102. Tdt = 0x06018/4, /* " tail */
  103. Txdctl = 0x06028/4, /* " control */
  104. Tdwbal = 0x06038/4, /* " write-back address low */
  105. Tdwbah = 0x0603c/4,
  106. Dtxctl98 = 0x07e00/4, /* tx dma control (598 only) */
  107. Dtxctl99 = 0x04a80/4, /* tx dma control (599 only) */
  108. Tdcatxctrl98 = 0x07200/4, /* tx dca register (0-15) (598 only) */
  109. Tdcatxctrl99 = 0x0600c/4, /* tx dca register (0-127) (599 only) */
  110. Tipg = 0x0cb00/4, /* tx inter-packet gap (598 only) */
  111. Txpbsize = 0x0cc00/4, /* tx packet-buffer size (0-15) */
  112. /* mac */
  113. Hlreg0 = 0x04240/4, /* highlander control reg 0 */
  114. Hlreg1 = 0x04244/4, /* highlander control reg 1 (ro) */
  115. Msca = 0x0425c/4, /* mdi signal cmd & addr */
  116. Msrwd = 0x04260/4, /* mdi single rw data */
  117. Mhadd = 0x04268/4, /* mac addr high & max frame */
  118. Pcss1 = 0x04288/4, /* xgxs status 1 */
  119. Pcss2 = 0x0428c/4,
  120. Xpcss = 0x04290/4, /* 10gb-x pcs status */
  121. Serdesc = 0x04298/4, /* serdes control */
  122. Macs = 0x0429c/4, /* fifo control & report */
  123. Autoc = 0x042a0/4, /* autodetect control & status */
  124. Links = 0x042a4/4, /* link status */
  125. Links2 = 0x04324/4, /* 599 only */
  126. Autoc2 = 0x042a8/4,
  127. };
  128. enum {
  129. Factive = 1<<0,
  130. Enable = 1<<31,
  131. /* Ctrl */
  132. Rst = 1<<26, /* full nic reset */
  133. /* Txdctl */
  134. Ten = 1<<25,
  135. /* Dtxctl99 */
  136. Te = 1<<0, /* dma tx enable */
  137. /* Fctrl */
  138. Bam = 1<<10, /* broadcast accept mode */
  139. Upe = 1<<9, /* unicast promiscuous */
  140. Mpe = 1<<8, /* multicast promiscuous */
  141. /* Rxdctl */
  142. Pthresh = 0, /* prefresh threshold shift in bits */
  143. Hthresh = 8, /* host buffer minimum threshold " */
  144. Wthresh = 16, /* writeback threshold */
  145. Renable = 1<<25,
  146. /* Rxctl */
  147. Rxen = 1<<0,
  148. Dmbyps = 1<<1, /* descr. monitor bypass (598 only) */
  149. /* Rdrxctl */
  150. Rdmt½ = 0, /* 598 */
  151. Rdmt¼ = 1, /* 598 */
  152. Rdmt⅛ = 2, /* 598 */
  153. Crcstrip = 1<<1, /* 599 */
  154. Rscfrstsize = 037<<17, /* 599; should be zero */
  155. /* Rxcsum */
  156. Ippcse = 1<<12, /* ip payload checksum enable */
  157. /* Eerd */
  158. EEstart = 1<<0, /* Start Read */
  159. EEdone = 1<<1, /* Read done */
  160. /* interrupts */
  161. Irx0 = 1<<0, /* driver defined */
  162. Itx0 = 1<<1, /* driver defined */
  163. Lsc = 1<<20, /* link status change */
  164. /* Links */
  165. Lnkup = 1<<30,
  166. Lnkspd = 1<<29,
  167. /* Hlreg0 */
  168. Txcrcen = 1<<0, /* add crc during xmit */
  169. Rxcrcstrip = 1<<1, /* strip crc during recv */
  170. Jumboen = 1<<2,
  171. Txpaden = 1<<10, /* pad short frames during xmit */
  172. /* Autoc */
  173. Flu = 1<<0, /* force link up */
  174. Lmsshift = 13, /* link mode select shift */
  175. Lmsmask = 7,
  176. };
  177. typedef struct Ctlr Ctlr;
  178. typedef struct Rd Rd;
  179. typedef struct Td Td;
  180. typedef struct {
  181. uint reg;
  182. char *name;
  183. } Stat;
  184. Stat stattab[] = {
  185. 0x4000, "crc error",
  186. 0x4004, "illegal byte",
  187. 0x4008, "short packet",
  188. 0x3fa0, "missed pkt0",
  189. 0x4034, "mac local flt",
  190. 0x4038, "mac rmt flt",
  191. 0x4040, "rx length err",
  192. 0x3f60, "xon tx",
  193. 0xcf60, "xon rx",
  194. 0x3f68, "xoff tx",
  195. 0xcf68, "xoff rx",
  196. 0x405c, "rx 040",
  197. 0x4060, "rx 07f",
  198. 0x4064, "rx 100",
  199. 0x4068, "rx 200",
  200. 0x406c, "rx 3ff",
  201. 0x4070, "rx big",
  202. 0x4074, "rx ok",
  203. 0x4078, "rx bcast",
  204. 0x3fc0, "rx no buf0",
  205. 0x40a4, "rx runt",
  206. 0x40a8, "rx frag",
  207. 0x40ac, "rx ovrsz",
  208. 0x40b0, "rx jab",
  209. 0x40d0, "rx pkt",
  210. 0x40d4, "tx pkt",
  211. 0x40d8, "tx 040",
  212. 0x40dc, "tx 07f",
  213. 0x40e0, "tx 100",
  214. 0x40e4, "tx 200",
  215. 0x40e8, "tx 3ff",
  216. 0x40ec, "tx big",
  217. 0x40f4, "tx bcast",
  218. 0x4120, "xsum err",
  219. };
  220. /* status */
  221. enum {
  222. Pif = 1<<7, /* past exact filter (sic) */
  223. Ipcs = 1<<6, /* ip checksum calculated */
  224. L4cs = 1<<5, /* layer 2 */
  225. Tcpcs = 1<<4, /* tcp checksum calculated */
  226. Vp = 1<<3, /* 802.1q packet matched vet */
  227. Ixsm = 1<<2, /* ignore checksum */
  228. Reop = 1<<1, /* end of packet */
  229. Rdd = 1<<0, /* descriptor done */
  230. };
  231. struct Rd { /* Receive Descriptor */
  232. u32int addr[2];
  233. ushort length;
  234. ushort cksum;
  235. uchar status;
  236. uchar errors;
  237. ushort vlan;
  238. };
  239. enum {
  240. /* Td cmd */
  241. Rs = 1<<3, /* report status */
  242. Ic = 1<<2, /* insert checksum */
  243. Ifcs = 1<<1, /* insert FCS (ethernet crc) */
  244. Teop = 1<<0, /* end of packet */
  245. /* Td status */
  246. Tdd = 1<<0, /* descriptor done */
  247. };
  248. struct Td { /* Transmit Descriptor */
  249. u32int addr[2];
  250. ushort length;
  251. uchar cso;
  252. uchar cmd;
  253. uchar status;
  254. uchar css;
  255. ushort vlan;
  256. };
  257. struct Ctlr {
  258. Pcidev *p;
  259. Ether *edev;
  260. int type;
  261. /* virtual */
  262. u32int *reg;
  263. u32int *msix; /* unused */
  264. /* physical */
  265. u32int *physreg;
  266. u32int *physmsix; /* unused */
  267. uchar flag;
  268. int nrd;
  269. int ntd;
  270. int nrb; /* # bufs this Ctlr has in the pool */
  271. uint rbsz;
  272. int procsrunning;
  273. int attached;
  274. Lock slock;
  275. Lock alock; /* attach lock */
  276. QLock tlock;
  277. Rendez lrendez;
  278. Rendez trendez;
  279. Rendez rrendez;
  280. uint im; /* interrupt mask */
  281. uint lim;
  282. uint rim;
  283. uint tim;
  284. Lock imlock;
  285. Rd* rdba; /* receive descriptor base address */
  286. Block** rb; /* receive buffers */
  287. int rdt; /* receive descriptor tail */
  288. int rdfree; /* rx descriptors awaiting packets */
  289. Td* tdba; /* transmit descriptor base address */
  290. int tdh; /* transmit descriptor head */
  291. int tdt; /* transmit descriptor tail */
  292. Block** tb; /* transmit buffers */
  293. uchar ra[Eaddrlen]; /* receive address */
  294. uchar mta[128]; /* multicast table array */
  295. ulong stats[nelem(stattab)];
  296. uint speeds[3];
  297. };
  298. enum {
  299. I82598 = 1,
  300. I82599,
  301. };
  302. static Ctlr *ctlrtab[4];
  303. static int nctlr;
  304. static Lock rblock;
  305. static Block *rbpool;
  306. static void
  307. readstats(Ctlr *c)
  308. {
  309. int i;
  310. lock(&c->slock);
  311. for(i = 0; i < nelem(c->stats); i++)
  312. c->stats[i] += c->reg[stattab[i].reg >> 2];
  313. unlock(&c->slock);
  314. }
  315. static int speedtab[] = {
  316. 0,
  317. 1000,
  318. 10000,
  319. };
  320. static long
  321. ifstat(Ether *e, void *a, long n, ulong offset)
  322. {
  323. uint i, *t;
  324. char *s, *p, *q;
  325. Ctlr *c;
  326. c = e->ctlr;
  327. p = s = malloc(READSTR);
  328. if(p == nil)
  329. error(Enomem);
  330. q = p + READSTR;
  331. readstats(c);
  332. for(i = 0; i < nelem(stattab); i++)
  333. if(c->stats[i] > 0)
  334. p = seprint(p, q, "%.10s %uld\n", stattab[i].name,
  335. c->stats[i]);
  336. t = c->speeds;
  337. p = seprint(p, q, "speeds: 0:%d 1000:%d 10000:%d\n", t[0], t[1], t[2]);
  338. p = seprint(p, q, "mtu: min:%d max:%d\n", e->minmtu, e->maxmtu);
  339. seprint(p, q, "rdfree %d rdh %d rdt %d\n", c->rdfree, c->reg[Rdt],
  340. c->reg[Rdh]);
  341. n = readstr(offset, a, n, s);
  342. free(s);
  343. return n;
  344. }
  345. static void
  346. ienable(Ctlr *c, int i)
  347. {
  348. ilock(&c->imlock);
  349. c->im |= i;
  350. c->reg[Ims] = c->im;
  351. iunlock(&c->imlock);
  352. }
  353. static int
  354. lim(void *v)
  355. {
  356. return ((Ctlr*)v)->lim != 0;
  357. }
  358. static void
  359. lproc(void *v)
  360. {
  361. int r, i;
  362. Ctlr *c;
  363. Ether *e;
  364. e = v;
  365. c = e->ctlr;
  366. for (;;) {
  367. r = c->reg[Links];
  368. e->link = (r & Lnkup) != 0;
  369. i = 0;
  370. if(e->link)
  371. i = 1 + ((r & Lnkspd) != 0);
  372. c->speeds[i]++;
  373. e->mbps = speedtab[i];
  374. c->lim = 0;
  375. ienable(c, Lsc);
  376. sleep(&c->lrendez, lim, c);
  377. c->lim = 0;
  378. }
  379. }
  380. static long
  381. ctl(Ether *, void *, long)
  382. {
  383. error(Ebadarg);
  384. return -1;
  385. }
  386. static Block*
  387. rballoc(void)
  388. {
  389. Block *bp;
  390. ilock(&rblock);
  391. if((bp = rbpool) != nil){
  392. rbpool = bp->next;
  393. bp->next = 0;
  394. _xinc(&bp->ref); /* prevent bp from being freed */
  395. }
  396. iunlock(&rblock);
  397. return bp;
  398. }
  399. void
  400. rbfree(Block *b)
  401. {
  402. b->rp = b->wp = (uchar*)PGROUND((uintptr)b->base);
  403. b->flag &= ~(Bipck | Budpck | Btcpck | Bpktck);
  404. ilock(&rblock);
  405. b->next = rbpool;
  406. rbpool = b;
  407. iunlock(&rblock);
  408. }
  409. static int
  410. cleanup(Ctlr *c, int tdh)
  411. {
  412. Block *b;
  413. uint m, n;
  414. m = c->ntd - 1;
  415. while(c->tdba[n = NEXTPOW2(tdh, m)].status & Tdd){
  416. tdh = n;
  417. b = c->tb[tdh];
  418. c->tb[tdh] = 0;
  419. if (b)
  420. freeb(b);
  421. c->tdba[tdh].status = 0;
  422. }
  423. return tdh;
  424. }
  425. void
  426. transmit(Ether *e)
  427. {
  428. uint i, m, tdt, tdh;
  429. Ctlr *c;
  430. Block *b;
  431. Td *t;
  432. c = e->ctlr;
  433. if(!canqlock(&c->tlock)){
  434. ienable(c, Itx0);
  435. return;
  436. }
  437. tdh = c->tdh = cleanup(c, c->tdh);
  438. tdt = c->tdt;
  439. m = c->ntd - 1;
  440. for(i = 0; ; i++){
  441. if(NEXTPOW2(tdt, m) == tdh){ /* ring full? */
  442. ienable(c, Itx0);
  443. break;
  444. }
  445. if((b = qget(e->oq)) == nil)
  446. break;
  447. assert(c->tdba != nil);
  448. t = c->tdba + tdt;
  449. t->addr[0] = PCIWADDR(b->rp);
  450. t->length = BLEN(b);
  451. t->cmd = Ifcs | Teop;
  452. if (!Goslow)
  453. t->cmd |= Rs;
  454. c->tb[tdt] = b;
  455. tdt = NEXTPOW2(tdt, m);
  456. }
  457. if(i) {
  458. coherence();
  459. c->reg[Tdt] = c->tdt = tdt; /* make new Tds active */
  460. coherence();
  461. ienable(c, Itx0);
  462. }
  463. qunlock(&c->tlock);
  464. }
  465. static int
  466. tim(void *c)
  467. {
  468. return ((Ctlr*)c)->tim != 0;
  469. }
  470. static void
  471. tproc(void *v)
  472. {
  473. Ctlr *c;
  474. Ether *e;
  475. e = v;
  476. c = e->ctlr;
  477. for (;;) {
  478. sleep(&c->trendez, tim, c); /* transmit interrupt kicks us */
  479. c->tim = 0;
  480. transmit(e);
  481. }
  482. }
  483. static void
  484. rxinit(Ctlr *c)
  485. {
  486. int i, is598;
  487. Block *b;
  488. c->reg[Rxctl] &= ~Rxen;
  489. c->reg[Rxdctl] = 0;
  490. for(i = 0; i < c->nrd; i++){
  491. b = c->rb[i];
  492. c->rb[i] = 0;
  493. if(b)
  494. freeb(b);
  495. }
  496. c->rdfree = 0;
  497. coherence();
  498. c->reg[Fctrl] |= Bam;
  499. c->reg[Fctrl] &= ~(Upe | Mpe);
  500. /* intel gets some csums wrong (e.g., errata 44) */
  501. c->reg[Rxcsum] &= ~Ippcse;
  502. c->reg[Hlreg0] &= ~Jumboen; /* jumbos are a bad idea */
  503. c->reg[Hlreg0] |= Txcrcen | Rxcrcstrip | Txpaden;
  504. c->reg[Srrctl] = (c->rbsz + 1024 - 1) / 1024;
  505. c->reg[Mhadd] = c->rbsz << 16;
  506. c->reg[Rbal] = PCIWADDR(c->rdba);
  507. c->reg[Rbah] = 0;
  508. c->reg[Rdlen] = c->nrd*sizeof(Rd); /* must be multiple of 128 */
  509. c->reg[Rdh] = 0;
  510. c->reg[Rdt] = c->rdt = 0;
  511. coherence();
  512. is598 = (c->type == I82598);
  513. if (is598)
  514. c->reg[Rdrxctl] = Rdmt¼;
  515. else {
  516. c->reg[Rdrxctl] |= Crcstrip;
  517. c->reg[Rdrxctl] &= ~Rscfrstsize;
  518. }
  519. if (Goslow && is598)
  520. c->reg[Rxdctl] = 8<<Wthresh | 8<<Pthresh | 4<<Hthresh | Renable;
  521. else
  522. c->reg[Rxdctl] = Renable;
  523. coherence();
  524. while (!(c->reg[Rxdctl] & Renable))
  525. ;
  526. c->reg[Rxctl] |= Rxen | (c->type == I82598? Dmbyps: 0);
  527. }
  528. static void
  529. replenish(Ctlr *c, uint rdh)
  530. {
  531. int rdt, m, i;
  532. Block *b;
  533. Rd *r;
  534. m = c->nrd - 1;
  535. i = 0;
  536. for(rdt = c->rdt; NEXTPOW2(rdt, m) != rdh; rdt = NEXTPOW2(rdt, m)){
  537. r = c->rdba + rdt;
  538. if((b = rballoc()) == nil){
  539. print("82598: no buffers\n");
  540. break;
  541. }
  542. c->rb[rdt] = b;
  543. r->addr[0] = PCIWADDR(b->rp);
  544. r->status = 0;
  545. c->rdfree++;
  546. i++;
  547. }
  548. if(i) {
  549. coherence();
  550. c->reg[Rdt] = c->rdt = rdt; /* hand back recycled rdescs */
  551. coherence();
  552. }
  553. }
  554. static int
  555. rim(void *v)
  556. {
  557. return ((Ctlr*)v)->rim != 0;
  558. }
  559. void
  560. rproc(void *v)
  561. {
  562. uint m, rdh;
  563. Block *b;
  564. Ctlr *c;
  565. Ether *e;
  566. Rd *r;
  567. e = v;
  568. c = e->ctlr;
  569. m = c->nrd - 1;
  570. for (rdh = 0; ; ) {
  571. replenish(c, rdh);
  572. ienable(c, Irx0);
  573. sleep(&c->rrendez, rim, c);
  574. for (;;) {
  575. c->rim = 0;
  576. r = c->rdba + rdh;
  577. if(!(r->status & Rdd))
  578. break; /* wait for pkts to arrive */
  579. b = c->rb[rdh];
  580. c->rb[rdh] = 0;
  581. if (r->length > ETHERMAXTU)
  582. print("82598: got jumbo of %d bytes\n", r->length);
  583. b->wp += r->length;
  584. b->lim = b->wp; /* lie like a dog */
  585. // r->status = 0;
  586. etheriq(e, b, 1);
  587. c->rdfree--;
  588. rdh = NEXTPOW2(rdh, m);
  589. if (c->rdfree <= c->nrd - 16)
  590. replenish(c, rdh);
  591. }
  592. }
  593. }
  594. static void
  595. promiscuous(void *a, int on)
  596. {
  597. Ctlr *c;
  598. Ether *e;
  599. e = a;
  600. c = e->ctlr;
  601. if(on)
  602. c->reg[Fctrl] |= Upe | Mpe;
  603. else
  604. c->reg[Fctrl] &= ~(Upe | Mpe);
  605. }
  606. static void
  607. multicast(void *a, uchar *ea, int on)
  608. {
  609. int b, i;
  610. Ctlr *c;
  611. Ether *e;
  612. e = a;
  613. c = e->ctlr;
  614. /*
  615. * multiple ether addresses can hash to the same filter bit,
  616. * so it's never safe to clear a filter bit.
  617. * if we want to clear filter bits, we need to keep track of
  618. * all the multicast addresses in use, clear all the filter bits,
  619. * then set the ones corresponding to in-use addresses.
  620. */
  621. i = ea[5] >> 1;
  622. b = (ea[5]&1)<<4 | ea[4]>>4;
  623. b = 1 << b;
  624. if(on)
  625. c->mta[i] |= b;
  626. // else
  627. // c->mta[i] &= ~b;
  628. c->reg[Mta+i] = c->mta[i];
  629. }
  630. static void
  631. freemem(Ctlr *c)
  632. {
  633. Block *b;
  634. while(b = rballoc()){
  635. b->free = 0;
  636. freeb(b);
  637. }
  638. free(c->rdba);
  639. c->rdba = nil;
  640. free(c->tdba);
  641. c->tdba = nil;
  642. free(c->rb);
  643. c->rb = nil;
  644. free(c->tb);
  645. c->tb = nil;
  646. }
  647. static int
  648. detach(Ctlr *c)
  649. {
  650. int i, is598;
  651. c->reg[Imc] = ~0;
  652. c->reg[Ctrl] |= Rst;
  653. for(i = 0; i < 100; i++){
  654. delay(1);
  655. if((c->reg[Ctrl] & Rst) == 0)
  656. break;
  657. }
  658. if (i >= 100)
  659. return -1;
  660. is598 = (c->type == I82598);
  661. if (is598) { /* errata */
  662. delay(50);
  663. c->reg[Ecc] &= ~(1<<21 | 1<<18 | 1<<9 | 1<<6);
  664. }
  665. /* not cleared by reset; kill it manually. */
  666. for(i = 1; i < 16; i++)
  667. c->reg[is598? Rah98: Rah99] &= ~Enable;
  668. for(i = 0; i < 128; i++)
  669. c->reg[Mta + i] = 0;
  670. for(i = 1; i < (is598? 640: 128); i++)
  671. c->reg[Vfta + i] = 0;
  672. // freemem(c); // TODO
  673. c->attached = 0;
  674. return 0;
  675. }
  676. static void
  677. shutdown(Ether *e)
  678. {
  679. detach(e->ctlr);
  680. // freemem(e->ctlr);
  681. }
  682. /* ≤ 20ms */
  683. static ushort
  684. eeread(Ctlr *c, int i)
  685. {
  686. c->reg[Eerd] = EEstart | i<<2;
  687. while((c->reg[Eerd] & EEdone) == 0)
  688. ;
  689. return c->reg[Eerd] >> 16;
  690. }
  691. static int
  692. eeload(Ctlr *c)
  693. {
  694. ushort u, v, p, l, i, j;
  695. if((eeread(c, 0) & 0xc0) != 0x40)
  696. return -1;
  697. u = 0;
  698. for(i = 0; i < 0x40; i++)
  699. u += eeread(c, i);
  700. for(i = 3; i < 0xf; i++){
  701. p = eeread(c, i);
  702. l = eeread(c, p++);
  703. if((int)p + l + 1 > 0xffff)
  704. continue;
  705. for(j = p; j < p + l; j++)
  706. u += eeread(c, j);
  707. }
  708. if(u != 0xbaba)
  709. return -1;
  710. if(c->reg[Status] & (1<<3))
  711. u = eeread(c, 10);
  712. else
  713. u = eeread(c, 9);
  714. u++;
  715. for(i = 0; i < Eaddrlen;){
  716. v = eeread(c, u + i/2);
  717. c->ra[i++] = v;
  718. c->ra[i++] = v>>8;
  719. }
  720. c->ra[5] += (c->reg[Status] & 0xc) >> 2;
  721. return 0;
  722. }
  723. static int
  724. reset(Ctlr *c)
  725. {
  726. int i, is598;
  727. uchar *p;
  728. if(detach(c)){
  729. print("82598: reset timeout\n");
  730. return -1;
  731. }
  732. if(eeload(c)){
  733. print("82598: eeprom failure\n");
  734. return -1;
  735. }
  736. p = c->ra;
  737. is598 = (c->type == I82598);
  738. c->reg[is598? Ral98: Ral99] = p[3]<<24 | p[2]<<16 | p[1]<<8 | p[0];
  739. c->reg[is598? Rah98: Rah99] = p[5]<<8 | p[4] | Enable;
  740. readstats(c);
  741. for(i = 0; i<nelem(c->stats); i++)
  742. c->stats[i] = 0;
  743. c->reg[Ctrlext] |= 1 << 16; /* required by errata (spec change 4) */
  744. if (Goslow) {
  745. /* make some guesses for flow control */
  746. c->reg[Fcrtl] = 0x10000 | Enable;
  747. c->reg[Fcrth] = 0x40000 | Enable;
  748. c->reg[Rcrtv] = 0x6000;
  749. } else
  750. c->reg[Fcrtl] = c->reg[Fcrth] = c->reg[Rcrtv] = 0;
  751. /* configure interrupt mapping (don't ask) */
  752. c->reg[Ivar+0] = 0 | 1<<7;
  753. c->reg[Ivar+64/4] = 1 | 1<<7;
  754. // c->reg[Ivar+97/4] = (2 | 1<<7) << (8*(97%4));
  755. if (Goslow) {
  756. /* interrupt throttling goes here. */
  757. for(i = Itr; i < Itr + 20; i++)
  758. c->reg[i] = 128; /* ¼µs intervals */
  759. c->reg[Itr + Itx0] = 256;
  760. } else { /* don't throttle */
  761. for(i = Itr; i < Itr + 20; i++)
  762. c->reg[i] = 0; /* ¼µs intervals */
  763. c->reg[Itr + Itx0] = 0;
  764. }
  765. return 0;
  766. }
  767. static void
  768. txinit(Ctlr *c)
  769. {
  770. Block *b;
  771. int i;
  772. if (Goslow)
  773. c->reg[Txdctl] = 16<<Wthresh | 16<<Pthresh;
  774. else
  775. c->reg[Txdctl] = 0;
  776. if (c->type == I82599)
  777. c->reg[Dtxctl99] = 0;
  778. coherence();
  779. for(i = 0; i < c->ntd; i++){
  780. b = c->tb[i];
  781. c->tb[i] = 0;
  782. if(b)
  783. freeb(b);
  784. }
  785. assert(c->tdba != nil);
  786. memset(c->tdba, 0, c->ntd * sizeof(Td));
  787. c->reg[Tdbal] = PCIWADDR(c->tdba);
  788. c->reg[Tdbah] = 0;
  789. c->reg[Tdlen] = c->ntd*sizeof(Td); /* must be multiple of 128 */
  790. c->reg[Tdh] = 0;
  791. c->tdh = c->ntd - 1;
  792. c->reg[Tdt] = c->tdt = 0;
  793. coherence();
  794. if (c->type == I82599)
  795. c->reg[Dtxctl99] |= Te;
  796. coherence();
  797. c->reg[Txdctl] |= Ten;
  798. coherence();
  799. while (!(c->reg[Txdctl] & Ten))
  800. ;
  801. }
  802. static void
  803. attach(Ether *e)
  804. {
  805. Block *b;
  806. Ctlr *c;
  807. char buf[KNAMELEN];
  808. c = e->ctlr;
  809. c->edev = e; /* point back to Ether* */
  810. lock(&c->alock);
  811. if(waserror()){
  812. unlock(&c->alock);
  813. freemem(c);
  814. nexterror();
  815. }
  816. if(c->rdba == nil) {
  817. c->nrd = Nrd;
  818. c->ntd = Ntd;
  819. c->rdba = mallocalign(c->nrd * sizeof *c->rdba, Descalign, 0, 0);
  820. c->tdba = mallocalign(c->ntd * sizeof *c->tdba, Descalign, 0, 0);
  821. c->rb = malloc(c->nrd * sizeof(Block *));
  822. c->tb = malloc(c->ntd * sizeof(Block *));
  823. if (c->rdba == nil || c->tdba == nil ||
  824. c->rb == nil || c->tb == nil)
  825. error(Enomem);
  826. for(c->nrb = 0; c->nrb < 2*Nrb; c->nrb++){
  827. b = allocb(c->rbsz + BY2PG); /* see rbfree() */
  828. if(b == nil)
  829. error(Enomem);
  830. b->free = rbfree;
  831. freeb(b);
  832. }
  833. }
  834. if (!c->attached) {
  835. rxinit(c);
  836. txinit(c);
  837. if (!c->procsrunning) {
  838. snprint(buf, sizeof buf, "#l%dl", e->ctlrno);
  839. kproc(buf, lproc, e);
  840. snprint(buf, sizeof buf, "#l%dr", e->ctlrno);
  841. kproc(buf, rproc, e);
  842. snprint(buf, sizeof buf, "#l%dt", e->ctlrno);
  843. kproc(buf, tproc, e);
  844. c->procsrunning = 1;
  845. }
  846. c->attached = 1;
  847. }
  848. unlock(&c->alock);
  849. poperror();
  850. }
  851. static void
  852. interrupt(Ureg*, void *v)
  853. {
  854. int icr, im;
  855. Ctlr *c;
  856. Ether *e;
  857. e = v;
  858. c = e->ctlr;
  859. ilock(&c->imlock);
  860. c->reg[Imc] = ~0; /* disable all intrs */
  861. im = c->im;
  862. while((icr = c->reg[Icr] & c->im) != 0){
  863. if(icr & Irx0){
  864. im &= ~Irx0;
  865. c->rim = Irx0;
  866. wakeup(&c->rrendez);
  867. }
  868. if(icr & Itx0){
  869. im &= ~Itx0;
  870. c->tim = Itx0;
  871. wakeup(&c->trendez);
  872. }
  873. if(icr & Lsc){
  874. im &= ~Lsc;
  875. c->lim = Lsc;
  876. wakeup(&c->lrendez);
  877. }
  878. }
  879. c->reg[Ims] = c->im = im; /* enable only intrs we didn't service */
  880. iunlock(&c->imlock);
  881. }
  882. static void
  883. scan(void)
  884. {
  885. int pciregs, pcimsix, type;
  886. ulong io, iomsi;
  887. void *mem, *memmsi;
  888. Ctlr *c;
  889. Pcidev *p;
  890. p = 0;
  891. while(p = pcimatch(p, Vintel, 0)){
  892. switch(p->did){
  893. case 0x10b6: /* 82598 backplane */
  894. case 0x10c6: /* 82598 af dual port */
  895. case 0x10c7: /* 82598 af single port */
  896. case 0x10dd: /* 82598 at cx4 */
  897. case 0x10ec: /* 82598 at cx4 dual port */
  898. pcimsix = 3;
  899. type = I82598;
  900. break;
  901. case 0x10f7: /* 82599 kx/kx4 */
  902. case 0x10f8: /* 82599 kx/kx4/kx */
  903. case 0x10f9: /* 82599 cx4 */
  904. case 0x10fb: /* 82599 sfi/sfp+ */
  905. case 0x10fc: /* 82599 xaui/bx4 */
  906. case 0x1557: /* 82599 single-port sfi */
  907. pcimsix = 4;
  908. type = I82599;
  909. break;
  910. default:
  911. continue;
  912. }
  913. pciregs = 0;
  914. if(nctlr == nelem(ctlrtab)){
  915. print("i82598: too many controllers\n");
  916. return;
  917. }
  918. io = p->mem[pciregs].bar & ~0xf;
  919. mem = vmap(io, p->mem[pciregs].size);
  920. if(mem == nil){
  921. print("i82598: can't map regs %#p\n",
  922. p->mem[pciregs].bar);
  923. continue;
  924. }
  925. iomsi = p->mem[pcimsix].bar & ~0xf;
  926. memmsi = vmap(iomsi, p->mem[pcimsix].size);
  927. if(memmsi == nil){
  928. print("i82598: can't map msi-x regs %#p\n",
  929. p->mem[pcimsix].bar);
  930. vunmap(mem, p->mem[pciregs].size);
  931. continue;
  932. }
  933. c = malloc(sizeof *c);
  934. if(c == nil) {
  935. vunmap(mem, p->mem[pciregs].size);
  936. vunmap(memmsi, p->mem[pcimsix].size);
  937. error(Enomem);
  938. }
  939. c->p = p;
  940. c->type = type;
  941. c->physreg = (u32int*)io;
  942. c->physmsix = (u32int*)iomsi;
  943. c->reg = (u32int*)mem;
  944. c->msix = (u32int*)memmsi; /* unused */
  945. c->rbsz = Rbsz;
  946. if(reset(c)){
  947. print("i82598: can't reset\n");
  948. free(c);
  949. vunmap(mem, p->mem[pciregs].size);
  950. vunmap(memmsi, p->mem[pcimsix].size);
  951. continue;
  952. }
  953. pcisetbme(p);
  954. ctlrtab[nctlr++] = c;
  955. }
  956. }
  957. static int
  958. pnp(Ether *e)
  959. {
  960. int i;
  961. Ctlr *c = nil;
  962. if(nctlr == 0)
  963. scan();
  964. for(i = 0; i < nctlr; i++){
  965. c = ctlrtab[i];
  966. if(c == nil || c->flag & Factive)
  967. continue;
  968. if(e->port == 0 || e->port == (ulong)c->reg)
  969. break;
  970. }
  971. if (i >= nctlr)
  972. return -1;
  973. c->flag |= Factive;
  974. e->ctlr = c;
  975. e->port = (uintptr)c->physreg;
  976. e->irq = c->p->intl;
  977. e->tbdf = c->p->tbdf;
  978. e->mbps = 10000;
  979. e->maxmtu = ETHERMAXTU;
  980. memmove(e->ea, c->ra, Eaddrlen);
  981. e->arg = e;
  982. e->attach = attach;
  983. e->ctl = ctl;
  984. e->ifstat = ifstat;
  985. e->interrupt = interrupt;
  986. e->multicast = multicast;
  987. e->promiscuous = promiscuous;
  988. e->shutdown = shutdown;
  989. e->transmit = transmit;
  990. return 0;
  991. }
  992. void
  993. ether82598link(void)
  994. {
  995. addethercard("i82598", pnp);
  996. }