etherm10g.c 25 KB


  1. /*
  2. * myricom 10 Gb ethernet (file server driver)
  3. * © 2007 erik quanstrom, coraid
  4. */
  5. #include "all.h"
  6. #include "io.h"
  7. #include "../ip/ip.h"
  8. #include "etherif.h"
  9. #include "portfns.h"
  10. #include "mem.h"
  11. #undef MB
  12. #define K * 1024
  13. #define MB * 1024 K
  14. #define dprint(...) if(debug) print(__VA_ARGS__)
  15. #define pcicapdbg(...)
  16. #define malign(n) ialloc(n, 4 K)
  17. #define PCIWADDR(x) PADDR(x)+0
  18. extern ulong upamalloc(ulong, int, int);
  19. #include "etherm10g2k.i"
  20. #include "etherm10g4k.i"
  21. static int debug = 0;
  22. static char Etimeout[] = "timeout";
  23. static char Enomem[] = "no memory";
  24. static char Enonexist[] = "controller lost";
  25. static char Ebadarg[] = "bad argument";
  26. enum {
  27. Epromsz = 256,
  28. Maxslots= 1024,
  29. Align = 4096,
  30. Maxmtu = 9000,
  31. Noconf = 0xffffffff,
  32. Fwoffset= 1 MB,
  33. Cmdoff = 0xf80000, /* command port offset */
  34. Fwsubmt = 0xfc0000, /* firmware submission command port offset */
  35. Rdmaoff = 0xfc01c0, /* rdma command port offset */
  36. };
  37. enum {
  38. CZero,
  39. Creset,
  40. Cversion,
  41. CSintrqdma, /* issue these before Cetherup */
  42. CSbigsz, /* in bytes bigsize = 2^n */
  43. CSsmallsz,
  44. CGsendoff,
  45. CGsmallrxoff,
  46. CGbigrxoff,
  47. CGirqackoff,
  48. CGirqdeassoff,
  49. CGsendrgsz,
  50. CGrxrgsz,
  51. CSintrqsz, /* 2^n */
  52. Cetherup, /* above parameters + mtu/mac addr must be set first. */
  53. Cetherdn,
  54. CSmtu, /* below may be issued live */
  55. CGcoaloff, /* in µs */
  56. CSstatsrate, /* in µs */
  57. CSstatsdma,
  58. Cpromisc,
  59. Cnopromisc,
  60. CSmac,
  61. Cenablefc,
  62. Cdisablefc,
  63. Cdmatest, /* address in d[0-1], d[2]=length */
  64. Cenableallmc,
  65. Cdisableallmc,
  66. CSjoinmc,
  67. CSleavemc,
  68. Cleaveallmc,
  69. CSstatsdma2, /* adds (unused) multicast stats */
  70. };
  71. typedef union {
  72. uint i[2];
  73. uchar c[8];
  74. } Cmd;
  75. typedef struct {
  76. u16int cksum;
  77. u16int len;
  78. } Slot;
  79. enum {
  80. SFsmall = 1,
  81. SFfirst = 2,
  82. SFalign = 4,
  83. SFnotso = 16,
  84. };
  85. typedef struct {
  86. u32int high;
  87. u32int low;
  88. u16int hdroff;
  89. u16int len;
  90. uchar pad;
  91. uchar nrdma;
  92. uchar chkoff;
  93. uchar flags;
  94. } Send;
  95. typedef struct {
  96. QLock;
  97. Send *lanai; /* tx ring (cksum+len in lanai memory) */
  98. Send *host; /* tx ring (data in our memory) */
  99. Msgbuf **bring;
  100. // uchar *wcfifo; /* what the heck is a w/c fifo? */
  101. int size; /* of buffers in the z8's memory */
  102. u32int segsz;
  103. uint n; /* rxslots */
  104. uint m; /* mask; rxslots must be a power of two */
  105. uint i; /* number of segments (not frames) queued */
  106. uint cnt; /* number of segments sent by the card */
  107. ulong npkt;
  108. vlong nbytes;
  109. } Tx;
  110. typedef struct {
  111. Lock;
  112. Msgbuf *head;
  113. uint size; /* buffer size of each block */
  114. uint n; /* n free buffers */
  115. uint cnt;
  116. } Bpool;
  117. Bpool smpool = { .size = 128, };
  118. Bpool bgpool = { .size = Maxmtu, };
  119. typedef struct {
  120. Bpool *pool; /* free buffers */
  121. u32int *lanai; /* rx ring; we have no permanent host shadow */
  122. Msgbuf **host; /* called "info" in myricom driver */
  123. // uchar *wcfifo; /* cmd submission fifo */
  124. uint m;
  125. uint n; /* rxslots */
  126. uint i;
  127. uint cnt; /* number of buffers allocated (lifetime) */
  128. uint allocfail;
  129. } Rx;
  130. /* dma mapped. internet network byte order. */
  131. typedef struct {
  132. uchar txcnt[4];
  133. uchar linkstat[4];
  134. uchar dlink[4];
  135. uchar derror[4];
  136. uchar drunt[4];
  137. uchar doverrun[4];
  138. uchar dnosm[4];
  139. uchar dnobg[4];
  140. uchar nrdma[4];
  141. uchar txstopped;
  142. uchar down;
  143. uchar updated;
  144. uchar valid;
  145. } Stats;
  146. enum {
  147. Detached,
  148. Attached,
  149. Runed,
  150. };
  151. typedef struct {
  152. Slot *entry;
  153. uintptr busaddr;
  154. uint m;
  155. uint n;
  156. uint i;
  157. } Done;
  158. typedef struct Ctlr Ctlr;
  159. typedef struct Ctlr {
  160. QLock;
  161. int state;
  162. int kprocs;
  163. uintptr port;
  164. Pcidev* pcidev;
  165. Ctlr* next;
  166. int active;
  167. int id; /* do we need this? */
  168. uchar ra[Easize];
  169. int ramsz;
  170. uchar *ram;
  171. u32int *irqack;
  172. u32int *irqdeass;
  173. u32int *coal;
  174. char eprom[Epromsz];
  175. ulong serial; /* unit serial number */
  176. QLock cmdl;
  177. Cmd *cmd; /* address of command return */
  178. uintptr cprt; /* bus address of command */
  179. uintptr boot; /* boot address */
  180. Done done;
  181. Tx tx;
  182. Rx sm;
  183. Rx bg;
  184. Stats *stats;
  185. uintptr statsprt;
  186. Rendez rxrendez;
  187. Rendez txrendez;
  188. int msi;
  189. u32int linkstat;
  190. u32int nrdma;
  191. } Ctlr;
  192. enum {
  193. PciCapPMG = 0x01, /* power management */
  194. PciCapAGP = 0x02,
  195. PciCapVPD = 0x03, /* vital product data */
  196. PciCapSID = 0x04, /* slot id */
  197. PciCapMSI = 0x05,
  198. PciCapCHS = 0x06, /* compact pci hot swap */
  199. PciCapPCIX = 0x07,
  200. PciCapHTC = 0x08, /* hypertransport irq conf */
  201. PciCapVND = 0x09, /* vendor specific information */
  202. PciCapHSW = 0x0C, /* hot swap */
  203. PciCapPCIe = 0x10,
  204. PciCapMSIX = 0x11,
  205. };
  206. enum {
  207. PcieAERC = 1,
  208. PcieVC,
  209. PcieSNC,
  210. PciePBC,
  211. };
  212. enum {
  213. AercCCR = 0x18, /* control register */
  214. };
  215. enum {
  216. PcieCTL = 8,
  217. PcieLCR = 12,
  218. PcieMRD = 0x7000, /* maximum read size */
  219. };
  220. int
  221. pcicap(Pcidev *p, int cap)
  222. {
  223. int i, c, off;
  224. pcicapdbg("pcicap: %x:%d\n", p->vid, p->did);
  225. off = 0x34; /* 0x14 for cardbus */
  226. for(i = 48; i--;){
  227. pcicapdbg("\t" "loop %x\n", off);
  228. off = pcicfgr8(p, off);
  229. pcicapdbg("\t" "pcicfgr8 %x\n", off);
  230. if(off < 0x40)
  231. break;
  232. off &= ~3;
  233. c = pcicfgr8(p, off);
  234. pcicapdbg("\t" "pcicfgr8 %x\n", c);
  235. if(c == 0xff)
  236. break;
  237. if(c == cap)
  238. return off;
  239. off++;
  240. }
  241. return 0;
  242. }
  243. /*
  244. * this function doesn't work because pcicgr32 doesn't have access
  245. * to the pcie extended configuration space.
  246. */
  247. int
  248. pciecap(Pcidev *p, int cap)
  249. {
  250. uint off, i;
  251. off = 0x100;
  252. while(((i = pcicfgr32(p, off)) & 0xffff) != cap){
  253. off = i >> 20;
  254. print("pciecap offset = %ud\n", off);
  255. if(off < 0x100 || off >= 4 K - 1)
  256. return 0;
  257. }
  258. print("pciecap found = %ud\n", off);
  259. return off;
  260. }
  261. static int
  262. setpcie(Pcidev *p)
  263. {
  264. int off;
  265. /* set 4k writes */
  266. off = pcicap(p, PciCapPCIe);
  267. if(off < 64)
  268. return -1;
  269. off += PcieCTL;
  270. pcicfgw16(p, off, (pcicfgr16(p, off) & ~PcieMRD) | 5<<12);
  271. return 0;
  272. }
  273. static int
  274. whichfw(Pcidev *p)
  275. {
  276. char *s;
  277. int i, off, lanes, ecrc;
  278. u32int cap;
  279. /* check the number of configured lanes. */
  280. off = pcicap(p, PciCapPCIe);
  281. if(off < 64)
  282. return -1;
  283. off += PcieLCR;
  284. cap = pcicfgr16(p, off);
  285. lanes = (cap>>4)&0x3f;
  286. /* check AERC register. we need it on. */
  287. off = pciecap(p, PcieAERC);
  288. print("%d offset\n", off);
  289. cap = 0;
  290. if(off != 0){
  291. off += AercCCR;
  292. cap = pcicfgr32(p, off);
  293. print("%ud cap\n", cap);
  294. }
  295. ecrc = (cap>>4)&0xf;
  296. /* if we don't like the aerc, kick it here. */
  297. print("m10g %d lanes; ecrc=%d; ", lanes, ecrc);
  298. if(s = getconf("myriforce")){
  299. i = strtoul(s, 0, 0);
  300. if(i != 4 K || i != 2 K)
  301. i = 2 K;
  302. print("fw=%d [forced]\n", i);
  303. return i;
  304. }
  305. if(lanes <= 4){
  306. print("fw = 4096 [lanes]\n");
  307. return 4 K;
  308. }
  309. if(ecrc & 10){
  310. print("fw = 4096 [ecrc set]\n");
  311. return 4K;
  312. }
  313. print("fw = 4096 [default]\n");
  314. return 4 K;
  315. }
  316. static int
  317. parseeprom(Ctlr *c)
  318. {
  319. int i, j, k, l, bits;
  320. char *s;
  321. dprint("m10g eprom:\n");
  322. s = c->eprom;
  323. bits = 3;
  324. for(i = 0; s[i] && i < Epromsz; i++){
  325. l = strlen(s+i);
  326. dprint("\t%s\n", s+i);
  327. if(strncmp(s+i, "MAC=", 4) == 0 && l == 4+12+5){
  328. bits ^= 1;
  329. j = i + 4;
  330. for(k = 0; k < 6; k++)
  331. c->ra[k] = strtoul(s+j+3*k, 0, 16);
  332. } else if(strncmp(s+i, "SN=", 3) == 0){
  333. bits ^= 2;
  334. c->serial = strtoul(s+i+3, 0, 0);
  335. }
  336. i += l;
  337. }
  338. if(bits)
  339. return -1;
  340. return 0;
  341. }
  342. u16int
  343. pbit16(u16int i)
  344. {
  345. u16int j;
  346. uchar *p;
  347. p = (uchar*)&j;
  348. p[1] = i;
  349. p[0] = i>>8;
  350. return j;
  351. }
  352. u16int
  353. gbit16(uchar i[2])
  354. {
  355. u16int j;
  356. j = i[1];
  357. j |= i[0]<<8;
  358. return j;
  359. }
  360. u32int
  361. pbit32(u32int i)
  362. {
  363. u32int j;
  364. uchar *p;
  365. p = (uchar*)&j;
  366. p[3] = i;
  367. p[2] = i>>8;
  368. p[1] = i>>16;
  369. p[0] = i>>24;
  370. return j;
  371. }
  372. static u32int
  373. gbit32(uchar i[4])
  374. {
  375. u32int j;
  376. j = i[3];
  377. j |= i[2]<<8;
  378. j |= i[1]<<16;
  379. j |= i[0]<<24;
  380. return j;
  381. }
  382. static void
  383. prepcmd(uint *cmd, int i)
  384. {
  385. while(i-- > 0)
  386. cmd[i] = pbit32(cmd[i]);
  387. }
  388. /*
  389. * the command looks like this (int 32bit integers)
  390. * cmd type
  391. * addr (low)
  392. * addr (high)
  393. * pad (used for dma testing)
  394. * response (high)
  395. * response (low)
  396. * 40 byte = 5 int pad.
  397. */
  398. static Rendez cmdr;
  399. static int
  400. return0(void *)
  401. {
  402. return 0;
  403. }
  404. u32int
  405. cmd(Ctlr *c, int type, u32int data)
  406. {
  407. u32int buf[16], i;
  408. Cmd *cmd;
  409. qlock(&c->cmdl);
  410. cmd = c->cmd;
  411. cmd->i[1] = Noconf;
  412. memset(buf, 0, sizeof buf);
  413. buf[0] = type;
  414. buf[1] = data;
  415. buf[5] = c->cprt;
  416. prepcmd(buf, 6);
  417. coherence();
  418. memmove(c->ram+Cmdoff, buf, sizeof buf);
  419. for(i = 0; i < 15; i++){
  420. if(cmd->i[1] != Noconf){
  421. i = gbit32(cmd->c);
  422. qunlock(&c->cmdl);
  423. if(cmd->i[1] != 0)
  424. dprint("[%ux]", i);
  425. return i;
  426. }
  427. delay(1);
  428. }
  429. qunlock(&c->cmdl);
  430. panic("m10g: cmd timeout [%ux %ux] cmd=%d\n",
  431. cmd->i[0], cmd->i[1], type);
  432. return ~0; /* silence! */
  433. }
  434. u32int
  435. maccmd(Ctlr *c, int type, uchar *m)
  436. {
  437. u32int buf[16], i;
  438. Cmd *cmd;
  439. qlock(&c->cmdl);
  440. cmd = c->cmd;
  441. cmd->i[1] = Noconf;
  442. memset(buf, 0, sizeof buf);
  443. buf[0] = type;
  444. buf[1] = m[0]<<24 | m[1]<<16 | m[2]<<8 | m[3];
  445. buf[2] = m[4]<< 8 | m[5];
  446. buf[5] = c->cprt;
  447. prepcmd(buf, 6);
  448. coherence();
  449. memmove(c->ram+Cmdoff, buf, sizeof buf);
  450. for(i = 0; i < 15; i++){
  451. if(cmd->i[1] != Noconf){
  452. i = gbit32(cmd->c);
  453. qunlock(&c->cmdl);
  454. if(cmd->i[1] != 0)
  455. dprint("[%ux]", i);
  456. return i;
  457. }
  458. delay(1);
  459. }
  460. qunlock(&c->cmdl);
  461. print("m10g: maccmd timeout [%ux %ux] cmd=%d\n",
  462. cmd->i[0], cmd->i[1], type);
  463. panic(Etimeout);
  464. return ~0; /* silence! */
  465. }
  466. /* remove this garbage after testing */
  467. enum {
  468. DMAread = 0x10000,
  469. DMAwrite= 0x1,
  470. };
  471. u32int
  472. dmatestcmd(Ctlr *c, int type, u32int addr, int len)
  473. {
  474. u32int buf[16], i;
  475. memset(buf, 0, sizeof buf);
  476. memset(c->cmd, Noconf, sizeof *c->cmd);
  477. buf[0] = Cdmatest;
  478. buf[1] = addr;
  479. buf[3] = len*type;
  480. buf[5] = c->cprt;
  481. prepcmd(buf, 6);
  482. coherence();
  483. memmove(c->ram+Cmdoff, buf, sizeof buf);
  484. for(i = 0; i < 15; i++){
  485. if(c->cmd->i[1] != Noconf){
  486. i = gbit32(c->cmd->c);
  487. if(i == 0)
  488. return 0;
  489. return i;
  490. }
  491. delay(5);
  492. }
  493. panic(Etimeout);
  494. return ~0; /* silence! */
  495. }
  496. u32int
  497. rdmacmd(Ctlr *c, int on)
  498. {
  499. u32int buf[16], i;
  500. memset(buf, 0, sizeof buf);
  501. c->cmd->i[0] = 0;
  502. coherence();
  503. buf[1] = c->cprt;
  504. buf[2] = Noconf;
  505. buf[4] = c->cprt;
  506. buf[5] = on;
  507. prepcmd(buf, 6);
  508. memmove(c->ram+Rdmaoff, buf, sizeof buf);
  509. for(i = 0; i < 20; i++){
  510. if(c->cmd->i[0] == Noconf)
  511. return gbit32(c->cmd->c);
  512. delay(1);
  513. }
  514. panic(Etimeout);
  515. return ~0; /* silence! */
  516. }
  517. static int
  518. loadfw(Ctlr *c, int *align)
  519. {
  520. uint *f, *s, sz;
  521. int i;
  522. if((*align = whichfw(c->pcidev)) == 4 K){
  523. f = (u32int*)fw4k;
  524. sz = sizeof fw4k;
  525. } else {
  526. f = (u32int*)fw2k;
  527. sz = sizeof fw2k;
  528. }
  529. s = (u32int*)(c->ram + Fwoffset);
  530. for(i = 0; i < sz / 4; i++)
  531. s[i] = f[i];
  532. return sz & ~3;
  533. }
  534. static int
  535. bootfw(Ctlr *c)
  536. {
  537. int i, sz, align;
  538. uint buf[16];
  539. Cmd* cmd;
  540. if((sz = loadfw(c, &align)) == 0)
  541. return 0;
  542. dprint("bootfw %d bytes ... ", sz);
  543. cmd = c->cmd;
  544. memset(buf, 0, sizeof buf);
  545. c->cmd->i[0] = 0;
  546. coherence();
  547. buf[0] = 0; /* upper 32 bits of dma target address */
  548. buf[1] = c->cprt; /* lower */
  549. buf[2] = Noconf; /* writeback */
  550. buf[3] = Fwoffset+8,
  551. buf[4] = sz-8;
  552. buf[5] = 8;
  553. buf[6] = 0;
  554. prepcmd(buf, 7);
  555. coherence();
  556. memmove(c->ram + Fwsubmt, buf, sizeof buf);
  557. for(i = 0; i < 20; i++){
  558. if(cmd->i[0] == Noconf)
  559. break;
  560. delay(1);
  561. }
  562. dprint("[%ux %ux]", gbit32(cmd->c), gbit32(cmd->c+4));
  563. if(i == 20){
  564. print("m10g: cannot load fw\n");
  565. return -1;
  566. }
  567. dprint("\n");
  568. c->tx.segsz = align;
  569. return 0;
  570. }
  571. int
  572. kickthebaby(Pcidev *p, Ctlr *c)
  573. {
  574. /* don't kick the baby! */
  575. u32int code;
  576. pcicfgw8(p, 0x10+c->boot, 0x3);
  577. pcicfgw32(p, 0x18+c->boot, 0xfffffff0);
  578. code = pcicfgr32(p, 0x14+c->boot);
  579. dprint("reboot status = %ux\n", code);
  580. if(code != 0xfffffff0)
  581. return -1;
  582. return 0;
  583. }
  584. typedef struct {
  585. uchar len[4];
  586. uchar type[4];
  587. char version[128];
  588. uchar globals[4];
  589. uchar ramsz[4];
  590. uchar specs[4];
  591. uchar specssz[4];
  592. } Fwhdr;
  593. enum {
  594. Tmx = 0x4d582020,
  595. Tpcie = 0x70636965,
  596. Teth = 0x45544820,
  597. Tmcp0 = 0x4d435030,
  598. };
  599. char*
  600. fwtype(u32int type)
  601. {
  602. switch(type){
  603. case Tmx:
  604. return "mx";
  605. case Tpcie:
  606. return "PCIe";
  607. case Teth:
  608. return "eth";
  609. case Tmcp0:
  610. return "mcp0";
  611. }
  612. return "*GOK*";
  613. }
  614. int
  615. chkfw(Ctlr *c)
  616. {
  617. uintptr off;
  618. Fwhdr *h;
  619. u32int type;
  620. off = gbit32(c->ram+0x3c);
  621. dprint("firmware %ulx\n", off);
  622. if((off&3) || off + sizeof *h > c->ramsz){
  623. print("!m10g: bad firmware %ulx\n", off);
  624. return -1;
  625. }
  626. h = (Fwhdr*)(c->ram + off);
  627. type = gbit32(h->type);
  628. dprint("\t" "type %s\n", fwtype(type));
  629. dprint("\t" "vers %s\n", h->version);
  630. dprint("\t" "ramsz %ux\n", gbit32(h->ramsz));
  631. if(type != Teth){
  632. print("!m10g: bad card type %s\n", fwtype(type));
  633. return -1;
  634. }
  635. return bootfw(c) || rdmacmd(c, 0);
  636. }
  637. static int
  638. reset(Ether *, Ctlr *c)
  639. {
  640. u32int i, sz;
  641. chkfw(c);
  642. cmd(c, Creset, 0);
  643. cmd(c, CSintrqsz, c->done.n * sizeof *c->done.entry);
  644. cmd(c, CSintrqdma, c->done.busaddr);
  645. c->irqack = (u32int*)(c->ram + cmd(c, CGirqackoff, 0));
  646. /* required only if we're not doing msi? */
  647. c->irqdeass = (u32int*)(c->ram + cmd(c, CGirqdeassoff, 0));
  648. /* this is the driver default, why fiddle with this? */
  649. c->coal = (u32int*)(c->ram + cmd(c, CGcoaloff, 0));
  650. *c->coal = pbit32(25);
  651. dprint("dma stats:\n");
  652. rdmacmd(c, 1);
  653. sz = c->tx.segsz;
  654. i = dmatestcmd(c, DMAread, c->done.busaddr, sz);
  655. print("\t" "read: %ud MB/s\n", ((i>>16)*sz*2)/(i&0xffff));
  656. i = dmatestcmd(c, DMAwrite, c->done.busaddr, sz);
  657. print("\t" "write: %ud MB/s\n", ((i>>16)*sz*2)/(i&0xffff));
  658. i = dmatestcmd(c, DMAwrite|DMAread, c->done.busaddr, sz);
  659. print("\t" "r/w: %ud MB/s\n", ((i>>16)*sz*2*2)/(i&0xffff));
  660. memset(c->done.entry, 0, c->done.n * sizeof *c->done.entry);
  661. maccmd(c, CSmac, c->ra);
  662. // cmd(c, Cnopromisc, 0);
  663. cmd(c, Cenablefc, 0);
  664. cmd(c, CSmtu, Maxmtu);
  665. dprint("CSmtu %d...\n", Maxmtu);
  666. return 0;
  667. }
  668. static int
  669. setmem(Pcidev *p, Ctlr *c)
  670. {
  671. u32int i, raddr;
  672. Done *d;
  673. void *mem;
  674. c->tx.segsz = 2048;
  675. c->ramsz = 2 MB - (2*48 K + 32 K) - 0x100;
  676. if(c->ramsz > p->mem[0].size)
  677. return -1;
  678. raddr = p->mem[0].bar & ~0x0F;
  679. mem = (void*)upamalloc(raddr, p->mem[0].size, 0);
  680. if(mem == nil){
  681. print("m10g: can't map %8.8lux\n", p->mem[0].bar);
  682. return -1;
  683. }
  684. dprint("%ux <- vmap(mem[0].size = %ux)\n", raddr, p->mem[0].size);
  685. c->port = raddr;
  686. c->ram = mem;
  687. c->cmd = malign(sizeof *c->cmd);
  688. c->cprt = PCIWADDR(c->cmd);
  689. d = &c->done;
  690. d->n = Maxslots;
  691. d->m = d->n - 1;
  692. i = d->n * sizeof *d->entry;
  693. d->entry = malign(i);
  694. memset(d->entry, 0, i);
  695. d->busaddr = PCIWADDR(d->entry);
  696. c->stats = malign(sizeof *c->stats);
  697. memset(c->stats, 0, sizeof *c->stats);
  698. c->statsprt = PCIWADDR(c->stats);
  699. memmove(c->eprom, c->ram + c->ramsz - Epromsz, Epromsz-2);
  700. return setpcie(p) || parseeprom(c);
  701. }
  702. static Rx*
  703. whichrx(Ctlr *c, int sz)
  704. {
  705. if(sz <= smpool.size)
  706. return &c->sm;
  707. return &c->bg;
  708. }
  709. static Msgbuf*
  710. m10balloc(Rx* rx)
  711. {
  712. Msgbuf *m;
  713. ilock(rx->pool);
  714. if((m = rx->pool->head) != nil){
  715. rx->pool->head = m->next;
  716. m->next = nil;
  717. rx->pool->n--;
  718. }
  719. iunlock(rx->pool);
  720. return m;
  721. }
  722. static void
  723. smbfree(Msgbuf *m)
  724. {
  725. Bpool *p;
  726. m->data = (uchar*)PGROUND((uintptr)m->xdata);
  727. m->count = 0;
  728. p = &smpool;
  729. ilock(p);
  730. m->next = p->head;
  731. p->head = m;
  732. p->n++;
  733. p->cnt++;
  734. iunlock(p);
  735. }
  736. static void
  737. bgbfree(Msgbuf *m)
  738. {
  739. Bpool *p;
  740. m->data = (uchar*)PGROUND((uintptr)m->xdata);
  741. m->count = 0;
  742. p = &bgpool;
  743. ilock(p);
  744. m->next = p->head;
  745. p->head = m;
  746. p->n++;
  747. p->cnt++;
  748. iunlock(p);
  749. }
  750. static void
  751. replenish(Rx *rx)
  752. {
  753. u32int buf[16], i, idx, e;
  754. Bpool *p;
  755. Msgbuf *m;
  756. p = rx->pool;
  757. if(p->n < 8)
  758. return;
  759. memset(buf, 0, sizeof buf);
  760. e = (rx->i - rx->cnt) & ~7;
  761. e += rx->n;
  762. while(p->n >= 8 && e){
  763. idx = rx->cnt & rx->m;
  764. for(i = 0; i < 8; i++){
  765. m = m10balloc(rx);
  766. buf[i*2+1] = pbit32(PCIWADDR(m->data));
  767. rx->host[idx+i] = m;
  768. }
  769. memmove(rx->lanai + 2*idx, buf, sizeof buf);
  770. coherence();
  771. rx->cnt += 8;
  772. e -= 8;
  773. }
  774. if(e && p->n > 7+1)
  775. print("should panic? pool->n = %d\n", p->n);
  776. }
  777. /*
  778. * future:
  779. * if (c->mtrr >= 0) {
  780. * c->tx.wcfifo = c->ram+0x200000;
  781. * c->sm.wcfifo = c->ram+0x300000;
  782. * c->bg.wcfifo = c->ram+0x340000;
  783. * }
  784. */
  785. static int
  786. nextpow(int j)
  787. {
  788. int i;
  789. for(i = 0; j > (1<<i); i++)
  790. ;
  791. return 1 << i;
  792. }
  793. static void*
  794. emalign(int sz)
  795. {
  796. void *v;
  797. v = malign(sz);
  798. if(v == nil)
  799. panic(Enomem);
  800. memset(v, 0, sz);
  801. return v;
  802. }
  803. #define Mbeth10gbebg Mbeth4
  804. #define Mbeth10gbesm Mbeth4
  805. static void
  806. open0(Ether *, Ctlr *c)
  807. {
  808. Msgbuf *m;
  809. int i, sz, entries;
  810. entries = cmd(c, CGsendrgsz, 0) / sizeof *c->tx.lanai;
  811. c->tx.lanai = (Send*)(c->ram + cmd(c, CGsendoff, 0));
  812. c->tx.host = emalign(entries * sizeof *c->tx.host);
  813. c->tx.bring = emalign(entries * sizeof *c->tx.bring);
  814. c->tx.n = entries;
  815. c->tx.m = entries-1;
  816. entries = cmd(c, CGrxrgsz, 0)/8;
  817. c->sm.pool = &smpool;
  818. cmd(c, CSsmallsz, c->sm.pool->size);
  819. c->sm.lanai = (u32int*)(c->ram + cmd(c, CGsmallrxoff, 0));
  820. c->sm.n = entries;
  821. c->sm.m = entries-1;
  822. c->sm.host = emalign(entries * sizeof *c->sm.host);
  823. c->bg.pool = &bgpool;
  824. c->bg.pool->size = nextpow(2 + Maxmtu); /* 2-byte alignment pad */
  825. cmd(c, CSbigsz, c->bg.pool->size);
  826. c->bg.lanai = (u32int*)(c->ram + cmd(c, CGbigrxoff, 0));
  827. c->bg.n = entries;
  828. c->bg.m = entries-1;
  829. c->bg.host = emalign(entries * sizeof *c->bg.host);
  830. sz = c->sm.pool->size + BY2PG;
  831. for(i = 0; i < c->sm.n; i++){
  832. m = mballoc(sz, 0, Mbeth10gbesm);
  833. m->free = smbfree;
  834. mbfree(m);
  835. }
  836. /* allocate our own buffers. leak the ones we're given. */
  837. // sz = c->bg.pool->size + BY2PG;
  838. for(i = 0; i < c->bg.n; i++){
  839. // m = mballoc(sz, 0, Mbeth10gbebg);
  840. m = mballoc( 1, 0, Mbeth10gbebg);
  841. m->xdata = emalign(c->bg.pool->size);
  842. m->free = bgbfree;
  843. mbfree(m);
  844. }
  845. cmd(c, CSstatsdma, c->statsprt);
  846. c->linkstat = ~0;
  847. c->nrdma = 15;
  848. cmd(c, Cetherup, 0);
  849. }
  850. static Msgbuf*
  851. nextbuf(Ctlr *c)
  852. {
  853. uint i;
  854. u16int l;
  855. Done *d;
  856. Msgbuf *m;
  857. Rx *rx;
  858. Slot *s;
  859. d = &c->done;
  860. s = d->entry;
  861. i = d->i & d->m;
  862. l = s[i].len;
  863. if(l == 0)
  864. return nil;
  865. *(u32int*)(s+i) = 0;
  866. d->i++;
  867. l = gbit16((uchar*)&l);
  868. rx = whichrx(c, l);
  869. if(rx->i >= rx->cnt){
  870. print("m10g: overrun\n");
  871. return nil;
  872. }
  873. i = rx->i & rx->m;
  874. m = rx->host[i];
  875. rx->host[i] = 0;
  876. if(m == 0){
  877. print("m10g: error rx to no block. memory is hosed.\n");
  878. return nil;
  879. }
  880. rx->i++;
  881. m->data += 2;
  882. m->count = l;
  883. return m;
  884. }
  885. static int
  886. rxcansleep(void *v)
  887. {
  888. Ctlr *c;
  889. Slot *s;
  890. Done *d;
  891. c = v;
  892. d = &c->done;
  893. s = c->done.entry;
  894. if(s[d->i & d->m].len != 0)
  895. return -1;
  896. c->irqack[0] = pbit32(3);
  897. return 0;
  898. }
  899. static void
  900. m10rx(void)
  901. {
  902. Ether *e;
  903. Ctlr *c;
  904. Msgbuf *m;
  905. e = u->arg;
  906. c = e->ctlr;
  907. for(;;){
  908. replenish(&c->sm);
  909. replenish(&c->bg);
  910. sleep(&c->rxrendez, rxcansleep, c);
  911. while(m = nextbuf(c))
  912. etheriq(e, m);
  913. }
  914. }
  915. void
  916. txcleanup(Tx *tx, u32int n)
  917. {
  918. Msgbuf *mb;
  919. uint j, l, m;
  920. if(tx->npkt == n)
  921. return;
  922. l = 0;
  923. m = tx->m;
  924. /* if tx->cnt == tx->i, yet tx->npkt == n-1 we just */
  925. /* caught ourselves and myricom card updating. */
  926. for(;; tx->cnt++){
  927. j = tx->cnt & tx->m;
  928. if(mb = tx->bring[j]){
  929. tx->bring[j] = 0;
  930. tx->nbytes += mb->count;
  931. mbfree(mb);
  932. if(++tx->npkt == n)
  933. return;
  934. }
  935. if(tx->cnt == tx->i)
  936. return;
  937. if(l++ == m){
  938. print("tx ovrun: %ud %uld\n", n, tx->npkt);
  939. return;
  940. }
  941. }
  942. }
  943. static int
  944. txcansleep(void *v)
  945. {
  946. Ctlr *c;
  947. c = v;
  948. if(c->tx.cnt != c->tx.i && c->tx.npkt != gbit32(c->stats->txcnt))
  949. return -1;
  950. return 0;
  951. }
  952. void
  953. txproc(void)
  954. {
  955. Ether *e;
  956. Ctlr *c;
  957. Tx *tx;
  958. e = u->arg;
  959. c = e->ctlr;
  960. tx = &c->tx;
  961. for(;;){
  962. sleep(&c->txrendez, txcansleep, c);
  963. txcleanup(tx, gbit32(c->stats->txcnt));
  964. }
  965. }
  966. void
  967. submittx(Tx *tx, int n)
  968. {
  969. Send *l, *h;
  970. int i0, i, m;
  971. m = tx->m;
  972. i0 = tx->i & m;
  973. l = tx->lanai;
  974. h = tx->host;
  975. for(i = n-1; i >= 0; i--)
  976. memmove(l+(i + i0 & m), h+(i + i0 & m), sizeof *h);
  977. tx->i += n;
  978. // coherence();
  979. }
  980. int
  981. nsegments(Msgbuf *m, int segsz)
  982. {
  983. uintptr bus, end, slen, len;
  984. int i;
  985. bus = PCIWADDR(m->data);
  986. i = 0;
  987. for(len = m->count; len; len -= slen){
  988. end = bus + segsz & ~(segsz-1);
  989. slen = end-bus;
  990. if(slen > len)
  991. slen = len;
  992. bus += slen;
  993. i++;
  994. }
  995. return i;
  996. }
  997. static void
  998. m10gtransmit(Ether *e)
  999. {
  1000. u16int slen;
  1001. u32int i, cnt, rdma, nseg, count, end, bus, len, segsz;
  1002. uchar flags;
  1003. Ctlr *c;
  1004. Msgbuf *m;
  1005. Send *s, *s0, *s0m8;
  1006. Tx *tx;
  1007. c = e->ctlr;
  1008. tx = &c->tx;
  1009. segsz = tx->segsz;
  1010. qlock(tx);
  1011. count = 0;
  1012. s = tx->host + (tx->i & tx->m);
  1013. cnt = tx->cnt;
  1014. s0 = tx->host + (cnt & tx->m);
  1015. s0m8 = tx->host + (cnt - 8 & tx->m);
  1016. i = tx->i;
  1017. for(; s >= s0 || s < s0m8; i += nseg){
  1018. if((m = etheroq(e)) == nil)
  1019. break;
  1020. flags = SFfirst|SFnotso;
  1021. if((len = m->count) < 1520)
  1022. flags |= SFsmall;
  1023. rdma = nseg = nsegments(m, segsz);
  1024. bus = PCIWADDR(m->data);
  1025. for(; len; len -= slen){
  1026. end = bus + segsz & ~(segsz-1);
  1027. slen = end-bus;
  1028. if(slen > len)
  1029. slen = len;
  1030. s->low = pbit32(bus);
  1031. s->len = pbit16(slen);
  1032. s->nrdma = rdma;
  1033. s->flags = flags;
  1034. bus += slen;
  1035. if(++s == tx->host + tx->n)
  1036. s = tx->host;
  1037. count++;
  1038. flags &= ~SFfirst;
  1039. rdma = 1;
  1040. }
  1041. tx->bring[i + nseg - 1 & tx->m] = m;
  1042. submittx(tx, count);
  1043. count = 0;
  1044. cnt = tx->cnt;
  1045. s0 = tx->host + (cnt & tx->m);
  1046. s0m8 = tx->host + (cnt - 8 & tx->m);
  1047. }
  1048. qunlock(tx);
  1049. }
  1050. static void
  1051. checkstats(Ether *, Ctlr *c, Stats *s)
  1052. {
  1053. u32int i;
  1054. if(s->updated == 0)
  1055. return;
  1056. i = gbit32(s->linkstat);
  1057. if(c->linkstat != i)
  1058. if(c->linkstat = i)
  1059. dprint("m10g: link up\n");
  1060. else
  1061. dprint("m10g: link down\n");
  1062. i = gbit32(s->nrdma);
  1063. if(i != c->nrdma){
  1064. dprint("m10g: rdma timeout %d\n", i);
  1065. c->nrdma = i;
  1066. }
  1067. }
  1068. static void
  1069. waitintx(Ctlr *c)
  1070. {
  1071. int i;
  1072. for(i = 0; i < 1024*1024; i++){
  1073. if(c->stats->valid == 0)
  1074. break;
  1075. coherence();
  1076. }
  1077. }
  1078. static void
  1079. m10ginterrupt(Ureg *, void *v)
  1080. {
  1081. Ether *e;
  1082. Ctlr *c;
  1083. e = v;
  1084. c = e->ctlr;
  1085. if(c->state != Runed || c->stats->valid == 0) /* not ready for us? */
  1086. return;
  1087. if(c->stats->valid & 1)
  1088. wakeup(&c->rxrendez);
  1089. if(gbit32(c->stats->txcnt) != c->tx.npkt)
  1090. wakeup(&c->txrendez);
  1091. if(c->msi == 0)
  1092. *c->irqdeass = 0;
  1093. else
  1094. c->stats->valid = 0;
  1095. waitintx(c);
  1096. checkstats(e, c, c->stats);
  1097. c->irqack[1] = pbit32(3);
  1098. }
  1099. static void
  1100. m10gattach(Ether *e)
  1101. {
  1102. Ctlr *c;
  1103. char name[12];
  1104. dprint("m10gattach\n");
  1105. qlock(e->ctlr);
  1106. c = e->ctlr;
  1107. if(c->state != Detached){
  1108. qunlock(c);
  1109. return;
  1110. }
  1111. reset(e, c);
  1112. c->state = Attached;
  1113. open0(e, c);
  1114. if(c->kprocs == 0){
  1115. c->kprocs++;
  1116. snprint(name, sizeof name, "#l%drxproc", e->ctlrno);
  1117. userinit(m10rx, e, name);
  1118. snprint(name, sizeof name, "#l%dtxproc", e->ctlrno);
  1119. userinit(txproc, e, name);
  1120. }
  1121. c->state = Runed;
  1122. qunlock(c);
  1123. }
  1124. static int
  1125. lstcount(Msgbuf *m)
  1126. {
  1127. int i;
  1128. i = 0;
  1129. for(; m; m = m->next)
  1130. i++;
  1131. return i;
  1132. }
  1133. static char ifstatbuf[2 K];
  1134. static void
  1135. cifstat(Ctlr *c, int, char **)
  1136. {
  1137. Stats s;
  1138. /* no point in locking this because this is done via dma. */
  1139. memmove(&s, c->stats, sizeof s);
  1140. snprint(ifstatbuf, sizeof ifstatbuf,
  1141. "txcnt = %ud\n" "linkstat = %ud\n" "dlink = %ud\n"
  1142. "derror = %ud\n" "drunt = %ud\n" "doverrun = %ud\n"
  1143. "dnosm = %ud\n" "dnobg = %ud\n" "nrdma = %ud\n"
  1144. "txstopped = %ud\n" "down = %ud\n" "updated = %ud\n"
  1145. "valid = %ud\n\n"
  1146. "tx pkt = %uld\n" "tx bytes = %lld\n"
  1147. "tx cnt = %ud\n" "tx n = %ud\n" "tx i = %ud\n"
  1148. "sm cnt = %ud\n" "sm i = %ud\n" "sm n = %ud\n" "sm lst = %ud\n"
  1149. "bg cnt = %ud\n" "bg i = %ud\n" "bg n = %ud\n" "bg lst = %ud\n"
  1150. "segsz = %ud\n" "coal = %d\n",
  1151. gbit32(s.txcnt), gbit32(s.linkstat), gbit32(s.dlink),
  1152. gbit32(s.derror), gbit32(s.drunt), gbit32(s.doverrun),
  1153. gbit32(s.dnosm), gbit32(s.dnobg), gbit32(s.nrdma),
  1154. s.txstopped, s.down, s.updated, s.valid,
  1155. c->tx.npkt, c->tx.nbytes,
  1156. c->tx.cnt, c->tx.n, c->tx.i,
  1157. c->sm.cnt, c->sm.i, c->sm.pool->n, lstcount(c->sm.pool->head),
  1158. c->bg.cnt, c->bg.i, c->bg.pool->n, lstcount(c->bg.pool->head),
  1159. c->tx.segsz, gbit32((uchar*)c->coal));
  1160. print("%s", ifstatbuf);
  1161. }
  1162. static void
  1163. cdebug(Ctlr *, int, char**)
  1164. {
  1165. debug ^= 1;
  1166. print("debug %d\n", debug);
  1167. }
  1168. static void
  1169. ccoal(Ctlr *c, int n, char **v)
  1170. {
  1171. int i;
  1172. if(n == 2){
  1173. i = strtoul(*v, 0, 0);
  1174. *c->coal = pbit32(i);
  1175. coherence();
  1176. }
  1177. print("%d\n", gbit32((uchar*)c->coal));
  1178. }
  1179. static void
  1180. chelp(Ctlr*, int, char **)
  1181. {
  1182. print("coal ctlr n -- get/set interrupt colesing delay\n");
  1183. print("debug -- toggle debug (all ctlrs)\n");
  1184. print("ifstat ctlr -- print statistics\n");
  1185. }
  1186. static Ctlr ctlrs[3];
  1187. static int nctlr;
  1188. typedef struct {
  1189. void (*f)(Ctlr *, int, char**);
  1190. char* name;
  1191. int minarg;
  1192. int maxarg;
  1193. } Cmdtab;
  1194. static void
  1195. docmd(Cmdtab *t, int n, int c, char **v)
  1196. {
  1197. int i;
  1198. for(i = 0; i < n; i++)
  1199. if(strcmp(*v, t[n].name) == 0)
  1200. break;
  1201. c--;
  1202. v++;
  1203. t += i;
  1204. if(i == n)
  1205. print("unknown subcommand\n");
  1206. else if(c < t->minarg)
  1207. print("too few args, need %d\n", t->minarg);
  1208. else if(c > t->maxarg)
  1209. print("too many args, max %d\n", t->maxarg);
  1210. else {
  1211. i = 0;
  1212. if(t->minarg > 0){
  1213. i = strtoul(*v++, 0, 0);
  1214. c--;
  1215. }
  1216. if(i < 0 || i == nctlr)
  1217. print("bad controller %d\n", i);
  1218. else
  1219. t->f(ctlrs+i, c, v);
  1220. }
  1221. }
  1222. static Cmdtab ctab[] = {
  1223. cdebug, "debug", 0, 0,
  1224. ccoal, "coal", 1, 2,
  1225. cifstat,"ifstat", 1, 1,
  1226. chelp, "help", 0, 0,
  1227. };
  1228. static void
  1229. m10gctl(int c, char **v)
  1230. {
  1231. docmd(ctab, nelem(ctab), c, v);
  1232. }
  1233. static void
  1234. m10gpci(void)
  1235. {
  1236. Pcidev *p;
  1237. Ctlr *c;
  1238. for(p = 0; p = pcimatch(p, 0x14c1, 0x0008); ){
  1239. c = ctlrs + nctlr;
  1240. memset(c, 0, sizeof *c);
  1241. c->pcidev = p;
  1242. c->id = p->did<<16 | p->vid;
  1243. c->boot = pcicap(p, PciCapVND);
  1244. // kickthebaby(p, c);
  1245. pcisetbme(p);
  1246. if(setmem(p, c) == -1){
  1247. print("m10g failed\n");
  1248. continue;
  1249. }
  1250. if(++nctlr == nelem(ctlrs))
  1251. break;
  1252. }
  1253. }
  1254. int
  1255. m10gpnp(Ether *e)
  1256. {
  1257. Ctlr *c;
  1258. static int once, cmd;
  1259. if(once++ == 0)
  1260. m10gpci();
  1261. for(c = ctlrs; c < ctlrs + nctlr; c++)
  1262. if(c->active)
  1263. continue;
  1264. else if(e->port == 0 || e->port == c->port)
  1265. break;
  1266. if(c == ctlrs + nctlr)
  1267. return -1;
  1268. c->active = 1;
  1269. e->ctlr = c;
  1270. e->port = c->port;
  1271. e->irq = c->pcidev->intl;
  1272. e->tbdf = c->pcidev->tbdf;
  1273. e->mbps = 10000;
  1274. memmove(e->ea, c->ra, Easize);
  1275. e->attach = m10gattach;
  1276. e->transmit = m10gtransmit;
  1277. e->interrupt = m10ginterrupt;
  1278. if(cmd++)
  1279. cmd_install("myrictl", "tweak myri parameters", m10gctl);
  1280. return 0;
  1281. }