sysproc.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065
  1. #include "u.h"
  2. #include "tos.h"
  3. #include "../port/lib.h"
  4. #include "mem.h"
  5. #include "dat.h"
  6. #include "fns.h"
  7. #include "../port/error.h"
  8. #include "edf.h"
  9. #include <a.out.h>
  10. int shargs(char*, int, char**);
  11. extern void checkpages(void);
  12. extern void checkpagerefs(void);
  13. long
  14. sysr1(ulong*)
  15. {
  16. checkpagerefs();
  17. return 0;
  18. }
  19. long
  20. sysrfork(ulong *arg)
  21. {
  22. Proc *p;
  23. int n, i;
  24. Fgrp *ofg;
  25. Pgrp *opg;
  26. Rgrp *org;
  27. Egrp *oeg;
  28. ulong pid, flag;
  29. Mach *wm;
  30. flag = arg[0];
  31. /* Check flags before we commit */
  32. if((flag & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
  33. error(Ebadarg);
  34. if((flag & (RFNAMEG|RFCNAMEG)) == (RFNAMEG|RFCNAMEG))
  35. error(Ebadarg);
  36. if((flag & (RFENVG|RFCENVG)) == (RFENVG|RFCENVG))
  37. error(Ebadarg);
  38. if((flag&RFPROC) == 0) {
  39. if(flag & (RFMEM|RFNOWAIT))
  40. error(Ebadarg);
  41. if(flag & (RFFDG|RFCFDG)) {
  42. ofg = up->fgrp;
  43. if(flag & RFFDG)
  44. up->fgrp = dupfgrp(ofg);
  45. else
  46. up->fgrp = dupfgrp(nil);
  47. closefgrp(ofg);
  48. }
  49. if(flag & (RFNAMEG|RFCNAMEG)) {
  50. opg = up->pgrp;
  51. up->pgrp = newpgrp();
  52. if(flag & RFNAMEG)
  53. pgrpcpy(up->pgrp, opg);
  54. /* inherit noattach */
  55. up->pgrp->noattach = opg->noattach;
  56. closepgrp(opg);
  57. }
  58. if(flag & RFNOMNT)
  59. up->pgrp->noattach = 1;
  60. if(flag & RFREND) {
  61. org = up->rgrp;
  62. up->rgrp = newrgrp();
  63. closergrp(org);
  64. }
  65. if(flag & (RFENVG|RFCENVG)) {
  66. oeg = up->egrp;
  67. up->egrp = smalloc(sizeof(Egrp));
  68. up->egrp->ref = 1;
  69. if(flag & RFENVG)
  70. envcpy(up->egrp, oeg);
  71. closeegrp(oeg);
  72. }
  73. if(flag & RFNOTEG)
  74. up->noteid = incref(&noteidalloc);
  75. return 0;
  76. }
  77. p = newproc();
  78. p->fpsave = up->fpsave;
  79. p->scallnr = up->scallnr;
  80. p->s = up->s;
  81. p->nerrlab = 0;
  82. p->slash = up->slash;
  83. p->dot = up->dot;
  84. incref(p->dot);
  85. memmove(p->note, up->note, sizeof(p->note));
  86. p->privatemem = up->privatemem;
  87. p->noswap = up->noswap;
  88. p->nnote = up->nnote;
  89. p->notified = 0;
  90. p->lastnote = up->lastnote;
  91. p->notify = up->notify;
  92. p->ureg = up->ureg;
  93. p->dbgreg = 0;
  94. /* Make a new set of memory segments */
  95. n = flag & RFMEM;
  96. qlock(&p->seglock);
  97. if(waserror()){
  98. qunlock(&p->seglock);
  99. nexterror();
  100. }
  101. for(i = 0; i < NSEG; i++)
  102. if(up->seg[i])
  103. p->seg[i] = dupseg(up->seg, i, n);
  104. qunlock(&p->seglock);
  105. poperror();
  106. /* File descriptors */
  107. if(flag & (RFFDG|RFCFDG)) {
  108. if(flag & RFFDG)
  109. p->fgrp = dupfgrp(up->fgrp);
  110. else
  111. p->fgrp = dupfgrp(nil);
  112. }
  113. else {
  114. p->fgrp = up->fgrp;
  115. incref(p->fgrp);
  116. }
  117. /* Process groups */
  118. if(flag & (RFNAMEG|RFCNAMEG)) {
  119. p->pgrp = newpgrp();
  120. if(flag & RFNAMEG)
  121. pgrpcpy(p->pgrp, up->pgrp);
  122. /* inherit noattach */
  123. p->pgrp->noattach = up->pgrp->noattach;
  124. }
  125. else {
  126. p->pgrp = up->pgrp;
  127. incref(p->pgrp);
  128. }
  129. if(flag & RFNOMNT)
  130. up->pgrp->noattach = 1;
  131. if(flag & RFREND)
  132. p->rgrp = newrgrp();
  133. else {
  134. incref(up->rgrp);
  135. p->rgrp = up->rgrp;
  136. }
  137. /* Environment group */
  138. if(flag & (RFENVG|RFCENVG)) {
  139. p->egrp = smalloc(sizeof(Egrp));
  140. p->egrp->ref = 1;
  141. if(flag & RFENVG)
  142. envcpy(p->egrp, up->egrp);
  143. }
  144. else {
  145. p->egrp = up->egrp;
  146. incref(p->egrp);
  147. }
  148. p->hang = up->hang;
  149. p->procmode = up->procmode;
  150. /* Craft a return frame which will cause the child to pop out of
  151. * the scheduler in user mode with the return register zero
  152. */
  153. forkchild(p, up->dbgreg);
  154. p->parent = up;
  155. p->parentpid = up->pid;
  156. if(flag&RFNOWAIT)
  157. p->parentpid = 0;
  158. else {
  159. lock(&up->exl);
  160. up->nchild++;
  161. unlock(&up->exl);
  162. }
  163. if((flag&RFNOTEG) == 0)
  164. p->noteid = up->noteid;
  165. p->fpstate = up->fpstate;
  166. pid = p->pid;
  167. memset(p->time, 0, sizeof(p->time));
  168. p->time[TReal] = MACHP(0)->ticks;
  169. kstrdup(&p->text, up->text);
  170. kstrdup(&p->user, up->user);
  171. /*
  172. * since the bss/data segments are now shareable,
  173. * any mmu info about this process is now stale
  174. * (i.e. has bad properties) and has to be discarded.
  175. */
  176. flushmmu();
  177. p->basepri = up->basepri;
  178. p->priority = up->basepri;
  179. p->fixedpri = up->fixedpri;
  180. p->mp = up->mp;
  181. wm = up->wired;
  182. if(wm)
  183. procwired(p, wm->machno);
  184. ready(p);
  185. sched();
  186. return pid;
  187. }
  188. static ulong
  189. l2be(long l)
  190. {
  191. uchar *cp;
  192. cp = (uchar*)&l;
  193. return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3];
  194. }
  195. long
  196. sysexec(ulong *arg)
  197. {
  198. Segment *s, *ts;
  199. ulong t, d, b;
  200. int i;
  201. Chan *tc;
  202. char **argv, **argp;
  203. char *a, *charp, *args, *file;
  204. char *progarg[sizeof(Exec)/2+1], *elem, progelem[64];
  205. ulong ssize, spage, nargs, nbytes, n, bssend;
  206. int indir;
  207. Exec exec;
  208. char line[sizeof(Exec)];
  209. Fgrp *f;
  210. Image *img;
  211. ulong magic, text, entry, data, bss;
  212. Tos *tos;
  213. validaddr(arg[0], 1, 0);
  214. file = (char*)arg[0];
  215. indir = 0;
  216. elem = nil;
  217. if(waserror()){
  218. free(elem);
  219. nexterror();
  220. }
  221. for(;;){
  222. tc = namec(file, Aopen, OEXEC, 0);
  223. if(waserror()){
  224. cclose(tc);
  225. nexterror();
  226. }
  227. if(!indir)
  228. kstrdup(&elem, up->genbuf);
  229. n = devtab[tc->type]->read(tc, &exec, sizeof(Exec), 0);
  230. if(n < 2)
  231. error(Ebadexec);
  232. magic = l2be(exec.magic);
  233. text = l2be(exec.text);
  234. entry = l2be(exec.entry);
  235. if(n==sizeof(Exec) && (magic == AOUT_MAGIC)){
  236. if(text >= USTKTOP-UTZERO
  237. || entry < UTZERO+sizeof(Exec)
  238. || entry >= UTZERO+sizeof(Exec)+text)
  239. error(Ebadexec);
  240. break; /* for binary */
  241. }
  242. /*
  243. * Process #! /bin/sh args ...
  244. */
  245. memmove(line, &exec, sizeof(Exec));
  246. if(indir || line[0]!='#' || line[1]!='!')
  247. error(Ebadexec);
  248. n = shargs(line, n, progarg);
  249. if(n == 0)
  250. error(Ebadexec);
  251. indir = 1;
  252. /*
  253. * First arg becomes complete file name
  254. */
  255. progarg[n++] = file;
  256. progarg[n] = 0;
  257. validaddr(arg[1], BY2WD, 1);
  258. arg[1] += BY2WD;
  259. file = progarg[0];
  260. if(strlen(elem) >= sizeof progelem)
  261. error(Ebadexec);
  262. strcpy(progelem, elem);
  263. progarg[0] = progelem;
  264. poperror();
  265. cclose(tc);
  266. }
  267. data = l2be(exec.data);
  268. bss = l2be(exec.bss);
  269. t = (UTZERO+sizeof(Exec)+text+(BY2PG-1)) & ~(BY2PG-1);
  270. d = (t + data + (BY2PG-1)) & ~(BY2PG-1);
  271. bssend = t + data + bss;
  272. b = (bssend + (BY2PG-1)) & ~(BY2PG-1);
  273. if(t >= KZERO || d >= KZERO || b >= KZERO)
  274. error(Ebadexec);
  275. /*
  276. * Args: pass 1: count
  277. */
  278. nbytes = sizeof(Tos); /* hole for profiling clock at top of stack (and more) */
  279. nargs = 0;
  280. if(indir){
  281. argp = progarg;
  282. while(*argp){
  283. a = *argp++;
  284. nbytes += strlen(a) + 1;
  285. nargs++;
  286. }
  287. }
  288. evenaddr(arg[1]);
  289. argp = (char**)arg[1];
  290. validaddr((ulong)argp, BY2WD, 0);
  291. while(*argp){
  292. a = *argp++;
  293. if(((ulong)argp&(BY2PG-1)) < BY2WD)
  294. validaddr((ulong)argp, BY2WD, 0);
  295. validaddr((ulong)a, 1, 0);
  296. nbytes += ((char*)vmemchr(a, 0, 0x7FFFFFFF) - a) + 1;
  297. nargs++;
  298. }
  299. ssize = BY2WD*(nargs+1) + ((nbytes+(BY2WD-1)) & ~(BY2WD-1));
  300. /*
  301. * 8-byte align SP for those (e.g. sparc) that need it.
  302. * execregs() will subtract another 4 bytes for argc.
  303. */
  304. if((ssize+4) & 7)
  305. ssize += 4;
  306. spage = (ssize+(BY2PG-1)) >> PGSHIFT;
  307. /*
  308. * Build the stack segment, putting it in kernel virtual for the moment
  309. */
  310. if(spage > TSTKSIZ)
  311. error(Enovmem);
  312. qlock(&up->seglock);
  313. if(waserror()){
  314. qunlock(&up->seglock);
  315. nexterror();
  316. }
  317. up->seg[ESEG] = newseg(SG_STACK, TSTKTOP-USTKSIZE, USTKSIZE/BY2PG);
  318. /*
  319. * Args: pass 2: assemble; the pages will be faulted in
  320. */
  321. tos = (Tos*)(TSTKTOP - sizeof(Tos));
  322. tos->cyclefreq = m->cyclefreq;
  323. cycles((uvlong*)&tos->pcycles);
  324. tos->pcycles = -tos->pcycles;
  325. tos->kcycles = tos->pcycles;
  326. tos->clock = 0;
  327. argv = (char**)(TSTKTOP - ssize);
  328. charp = (char*)(TSTKTOP - nbytes);
  329. args = charp;
  330. if(indir)
  331. argp = progarg;
  332. else
  333. argp = (char**)arg[1];
  334. for(i=0; i<nargs; i++){
  335. if(indir && *argp==0) {
  336. indir = 0;
  337. argp = (char**)arg[1];
  338. }
  339. *argv++ = charp + (USTKTOP-TSTKTOP);
  340. n = strlen(*argp) + 1;
  341. memmove(charp, *argp++, n);
  342. charp += n;
  343. }
  344. free(up->text);
  345. up->text = elem;
  346. elem = nil; /* so waserror() won't free elem */
  347. USED(elem);
  348. /* copy args; easiest from new process's stack */
  349. n = charp - args;
  350. if(n > 128) /* don't waste too much space on huge arg lists */
  351. n = 128;
  352. a = up->args;
  353. up->args = nil;
  354. free(a);
  355. up->args = smalloc(n);
  356. memmove(up->args, args, n);
  357. if(n>0 && up->args[n-1]!='\0'){
  358. /* make sure last arg is NUL-terminated */
  359. /* put NUL at UTF-8 character boundary */
  360. for(i=n-1; i>0; --i)
  361. if(fullrune(up->args+i, n-i))
  362. break;
  363. up->args[i] = 0;
  364. n = i+1;
  365. }
  366. up->nargs = n;
  367. /*
  368. * Committed.
  369. * Free old memory.
  370. * Special segments are maintained across exec
  371. */
  372. for(i = SSEG; i <= BSEG; i++) {
  373. putseg(up->seg[i]);
  374. /* prevent a second free if we have an error */
  375. up->seg[i] = 0;
  376. }
  377. for(i = BSEG+1; i < NSEG; i++) {
  378. s = up->seg[i];
  379. if(s != 0 && (s->type&SG_CEXEC)) {
  380. putseg(s);
  381. up->seg[i] = 0;
  382. }
  383. }
  384. /*
  385. * Close on exec
  386. */
  387. f = up->fgrp;
  388. for(i=0; i<=f->maxfd; i++)
  389. fdclose(i, CCEXEC);
  390. /* Text. Shared. Attaches to cache image if possible */
  391. /* attachimage returns a locked cache image */
  392. img = attachimage(SG_TEXT|SG_RONLY, tc, UTZERO, (t-UTZERO)>>PGSHIFT);
  393. ts = img->s;
  394. up->seg[TSEG] = ts;
  395. ts->flushme = 1;
  396. ts->fstart = 0;
  397. ts->flen = sizeof(Exec)+text;
  398. unlock(img);
  399. /* Data. Shared. */
  400. s = newseg(SG_DATA, t, (d-t)>>PGSHIFT);
  401. up->seg[DSEG] = s;
  402. /* Attached by hand */
  403. incref(img);
  404. s->image = img;
  405. s->fstart = ts->fstart+ts->flen;
  406. s->flen = data;
  407. /* BSS. Zero fill on demand */
  408. up->seg[BSEG] = newseg(SG_BSS, d, (b-d)>>PGSHIFT);
  409. /*
  410. * Move the stack
  411. */
  412. s = up->seg[ESEG];
  413. up->seg[ESEG] = 0;
  414. up->seg[SSEG] = s;
  415. qunlock(&up->seglock);
  416. poperror(); /* seglock */
  417. poperror(); /* elem */
  418. s->base = USTKTOP-USTKSIZE;
  419. s->top = USTKTOP;
  420. relocateseg(s, USTKTOP-TSTKTOP);
  421. /*
  422. * '/' processes are higher priority (hack to make /ip more responsive).
  423. */
  424. if(devtab[tc->type]->dc == L'/')
  425. up->basepri = PriRoot;
  426. up->priority = up->basepri;
  427. poperror();
  428. cclose(tc);
  429. /*
  430. * At this point, the mmu contains info about the old address
  431. * space and needs to be flushed
  432. */
  433. flushmmu();
  434. qlock(&up->debug);
  435. up->nnote = 0;
  436. up->notify = 0;
  437. up->notified = 0;
  438. up->privatemem = 0;
  439. procsetup(up);
  440. qunlock(&up->debug);
  441. if(up->hang)
  442. up->procctl = Proc_stopme;
  443. return execregs(entry, ssize, nargs);
  444. }
  445. int
  446. shargs(char *s, int n, char **ap)
  447. {
  448. int i;
  449. s += 2;
  450. n -= 2; /* skip #! */
  451. for(i=0; s[i]!='\n'; i++)
  452. if(i == n-1)
  453. return 0;
  454. s[i] = 0;
  455. *ap = 0;
  456. i = 0;
  457. for(;;) {
  458. while(*s==' ' || *s=='\t')
  459. s++;
  460. if(*s == 0)
  461. break;
  462. i++;
  463. *ap++ = s;
  464. *ap = 0;
  465. while(*s && *s!=' ' && *s!='\t')
  466. s++;
  467. if(*s == 0)
  468. break;
  469. else
  470. *s++ = 0;
  471. }
  472. return i;
  473. }
  474. int
  475. return0(void*)
  476. {
  477. return 0;
  478. }
  479. long
  480. syssleep(ulong *arg)
  481. {
  482. int n;
  483. n = arg[0];
  484. if(n <= 0) {
  485. if (up->edf && (up->edf->flags & Admitted))
  486. edfyield();
  487. else
  488. yield();
  489. return 0;
  490. }
  491. if(n < TK2MS(1))
  492. n = TK2MS(1);
  493. tsleep(&up->sleep, return0, 0, n);
  494. return 0;
  495. }
  496. long
  497. sysalarm(ulong *arg)
  498. {
  499. return procalarm(arg[0]);
  500. }
  501. long
  502. sysexits(ulong *arg)
  503. {
  504. char *status;
  505. char *inval = "invalid exit string";
  506. char buf[ERRMAX];
  507. status = (char*)arg[0];
  508. if(status){
  509. if(waserror())
  510. status = inval;
  511. else{
  512. validaddr((ulong)status, 1, 0);
  513. if(vmemchr(status, 0, ERRMAX) == 0){
  514. memmove(buf, status, ERRMAX);
  515. buf[ERRMAX-1] = 0;
  516. status = buf;
  517. }
  518. poperror();
  519. }
  520. }
  521. pexit(status, 1);
  522. return 0; /* not reached */
  523. }
  524. long
  525. sys_wait(ulong *arg)
  526. {
  527. int pid;
  528. Waitmsg w;
  529. OWaitmsg *ow;
  530. if(arg[0] == 0)
  531. return pwait(nil);
  532. validaddr(arg[0], sizeof(OWaitmsg), 1);
  533. evenaddr(arg[0]);
  534. pid = pwait(&w);
  535. if(pid >= 0){
  536. ow = (OWaitmsg*)arg[0];
  537. readnum(0, ow->pid, NUMSIZE, w.pid, NUMSIZE);
  538. readnum(0, ow->time+TUser*NUMSIZE, NUMSIZE, w.time[TUser], NUMSIZE);
  539. readnum(0, ow->time+TSys*NUMSIZE, NUMSIZE, w.time[TSys], NUMSIZE);
  540. readnum(0, ow->time+TReal*NUMSIZE, NUMSIZE, w.time[TReal], NUMSIZE);
  541. strncpy(ow->msg, w.msg, sizeof(ow->msg));
  542. ow->msg[sizeof(ow->msg)-1] = '\0';
  543. }
  544. return pid;
  545. }
  546. long
  547. sysawait(ulong *arg)
  548. {
  549. int i;
  550. int pid;
  551. Waitmsg w;
  552. ulong n;
  553. n = arg[1];
  554. validaddr(arg[0], n, 1);
  555. pid = pwait(&w);
  556. if(pid < 0)
  557. return -1;
  558. i = snprint((char*)arg[0], n, "%d %lud %lud %lud %q",
  559. w.pid,
  560. w.time[TUser], w.time[TSys], w.time[TReal],
  561. w.msg);
  562. return i;
  563. }
  564. void
  565. werrstr(char *fmt, ...)
  566. {
  567. va_list va;
  568. if(up == nil)
  569. return;
  570. va_start(va, fmt);
  571. vseprint(up->syserrstr, up->syserrstr+ERRMAX, fmt, va);
  572. va_end(va);
  573. }
  574. static long
  575. generrstr(char *buf, uint nbuf)
  576. {
  577. char tmp[ERRMAX];
  578. if(nbuf == 0)
  579. error(Ebadarg);
  580. validaddr((ulong)buf, nbuf, 1);
  581. if(nbuf > sizeof tmp)
  582. nbuf = sizeof tmp;
  583. memmove(tmp, buf, nbuf);
  584. /* make sure it's NUL-terminated */
  585. tmp[nbuf-1] = '\0';
  586. memmove(buf, up->syserrstr, nbuf);
  587. buf[nbuf-1] = '\0';
  588. memmove(up->syserrstr, tmp, nbuf);
  589. return 0;
  590. }
  591. long
  592. syserrstr(ulong *arg)
  593. {
  594. return generrstr((char*)arg[0], arg[1]);
  595. }
  596. /* compatibility for old binaries */
  597. long
  598. sys_errstr(ulong *arg)
  599. {
  600. return generrstr((char*)arg[0], 64);
  601. }
  602. long
  603. sysnotify(ulong *arg)
  604. {
  605. if(arg[0] != 0)
  606. validaddr(arg[0], sizeof(ulong), 0);
  607. up->notify = (int(*)(void*, char*))(arg[0]);
  608. return 0;
  609. }
  610. long
  611. sysnoted(ulong *arg)
  612. {
  613. if(arg[0]!=NRSTR && !up->notified)
  614. error(Egreg);
  615. return 0;
  616. }
  617. long
  618. syssegbrk(ulong *arg)
  619. {
  620. int i;
  621. ulong addr;
  622. Segment *s;
  623. addr = arg[0];
  624. for(i = 0; i < NSEG; i++) {
  625. s = up->seg[i];
  626. if(s == 0 || addr < s->base || addr >= s->top)
  627. continue;
  628. switch(s->type&SG_TYPE) {
  629. case SG_TEXT:
  630. case SG_DATA:
  631. case SG_STACK:
  632. error(Ebadarg);
  633. default:
  634. return ibrk(arg[1], i);
  635. }
  636. }
  637. error(Ebadarg);
  638. return 0; /* not reached */
  639. }
  640. long
  641. syssegattach(ulong *arg)
  642. {
  643. return segattach(up, arg[0], (char*)arg[1], arg[2], arg[3]);
  644. }
  645. long
  646. syssegdetach(ulong *arg)
  647. {
  648. int i;
  649. ulong addr;
  650. Segment *s;
  651. qlock(&up->seglock);
  652. if(waserror()){
  653. qunlock(&up->seglock);
  654. nexterror();
  655. }
  656. s = 0;
  657. addr = arg[0];
  658. for(i = 0; i < NSEG; i++)
  659. if(s = up->seg[i]) {
  660. qlock(&s->lk);
  661. if((addr >= s->base && addr < s->top) ||
  662. (s->top == s->base && addr == s->base))
  663. goto found;
  664. qunlock(&s->lk);
  665. }
  666. error(Ebadarg);
  667. found:
  668. /*
  669. * Check we are not detaching the initial stack segment.
  670. */
  671. if(s == up->seg[SSEG]){
  672. qunlock(&s->lk);
  673. error(Ebadarg);
  674. }
  675. up->seg[i] = 0;
  676. qunlock(&s->lk);
  677. putseg(s);
  678. qunlock(&up->seglock);
  679. poperror();
  680. /* Ensure we flush any entries from the lost segment */
  681. flushmmu();
  682. return 0;
  683. }
  684. long
  685. syssegfree(ulong *arg)
  686. {
  687. Segment *s;
  688. ulong from, to;
  689. from = arg[0];
  690. s = seg(up, from, 1);
  691. if(s == nil)
  692. error(Ebadarg);
  693. to = (from + arg[1]) & ~(BY2PG-1);
  694. from = PGROUND(from);
  695. if(to > s->top) {
  696. qunlock(&s->lk);
  697. error(Ebadarg);
  698. }
  699. mfreeseg(s, from, (to - from) / BY2PG);
  700. qunlock(&s->lk);
  701. flushmmu();
  702. return 0;
  703. }
  704. /* For binary compatibility */
  705. long
  706. sysbrk_(ulong *arg)
  707. {
  708. return ibrk(arg[0], BSEG);
  709. }
  710. long
  711. sysrendezvous(ulong *arg)
  712. {
  713. uintptr tag, val;
  714. Proc *p, **l;
  715. tag = arg[0];
  716. l = &REND(up->rgrp, tag);
  717. up->rendval = ~(uintptr)0;
  718. lock(up->rgrp);
  719. for(p = *l; p; p = p->rendhash) {
  720. if(p->rendtag == tag) {
  721. *l = p->rendhash;
  722. val = p->rendval;
  723. p->rendval = arg[1];
  724. while(p->mach != 0)
  725. ;
  726. ready(p);
  727. unlock(up->rgrp);
  728. return val;
  729. }
  730. l = &p->rendhash;
  731. }
  732. /* Going to sleep here */
  733. up->rendtag = tag;
  734. up->rendval = arg[1];
  735. up->rendhash = *l;
  736. *l = up;
  737. up->state = Rendezvous;
  738. unlock(up->rgrp);
  739. sched();
  740. return up->rendval;
  741. }
  742. /*
  743. * The implementation of semaphores is complicated by needing
  744. * to avoid rescheduling in syssemrelease, so that it is safe
  745. * to call from real-time processes. This means syssemrelease
  746. * cannot acquire any qlocks, only spin locks.
  747. *
  748. * Semacquire and semrelease must both manipulate the semaphore
  749. * wait list. Lock-free linked lists only exist in theory, not
  750. * in practice, so the wait list is protected by a spin lock.
  751. *
  752. * The semaphore value *addr is stored in user memory, so it
  753. * cannot be read or written while holding spin locks.
  754. *
  755. * Thus, we can access the list only when holding the lock, and
  756. * we can access the semaphore only when not holding the lock.
  757. * This makes things interesting. Note that sleep's condition function
  758. * is called while holding two locks - r and up->rlock - so it cannot
  759. * access the semaphore value either.
  760. *
  761. * An acquirer announces its intention to try for the semaphore
  762. * by putting a Sema structure onto the wait list and then
  763. * setting Sema.waiting. After one last check of semaphore,
  764. * the acquirer sleeps until Sema.waiting==0. A releaser of n
  765. * must wake up n acquirers who have Sema.waiting set. It does
  766. * this by clearing Sema.waiting and then calling wakeup.
  767. *
  768. * There are three interesting races here.
  769. * The first is that in this particular sleep/wakeup usage, a single
  770. * wakeup can rouse a process from two consecutive sleeps!
  771. * The ordering is:
  772. *
  773. * (a) set Sema.waiting = 1
  774. * (a) call sleep
  775. * (b) set Sema.waiting = 0
  776. * (a) check Sema.waiting inside sleep, return w/o sleeping
  777. * (a) try for semaphore, fail
  778. * (a) set Sema.waiting = 1
  779. * (a) call sleep
  780. * (b) call wakeup(a)
  781. * (a) wake up again
  782. *
  783. * This is okay - semacquire will just go around the loop
  784. * again. It does mean that at the top of the for(;;) loop in
  785. * semacquire, phore.waiting might already be set to 1.
  786. *
  787. * The second is that a releaser might wake an acquirer who is
  788. * interrupted before he can acquire the lock. Since
  789. * release(n) issues only n wakeup calls -- only n can be used
  790. * anyway -- if the interrupted process is not going to use his
  791. * wakeup call he must pass it on to another acquirer.
  792. *
  793. * The third race is similar to the second but more subtle. An
  794. * acquirer sets waiting=1 and then does a final canacquire()
  795. * before going to sleep. The opposite order would result in
  796. * missing wakeups that happen between canacquire and
  797. * waiting=1. (In fact, the whole point of Sema.waiting is to
  798. * avoid missing wakeups between canacquire() and sleep().) But
  799. * there can be spurious wakeups between a successful
  800. * canacquire() and the following semdequeue(). This wakeup is
  801. * not useful to the acquirer, since he has already acquired
  802. * the semaphore. Like in the previous case, though, the
  803. * acquirer must pass the wakeup call along.
  804. *
  805. * This is all rather subtle. The code below has been verified
  806. * with the spin model /sys/src/9/port/semaphore.p. The
  807. * original code anticipated the second race but not the first
  808. * or third, which were caught only with spin. The first race
  809. * is mentioned in /sys/doc/sleep.ps, but I'd forgotten about it.
  810. * It was lucky that my abstract model of sleep/wakeup still managed
  811. * to preserve that behavior.
  812. *
  813. * I remain slightly concerned about memory coherence
  814. * outside of locks. The spin model does not take
  815. * queued processor writes into account so we have to
  816. * think hard. The only variables accessed outside locks
  817. * are the semaphore value itself and the boolean flag
  818. * Sema.waiting. The value is only accessed with cmpswap,
  819. * whose job description includes doing the right thing as
  820. * far as memory coherence across processors. That leaves
  821. * Sema.waiting. To handle it, we call coherence() before each
  822. * read and after each write. - rsc
  823. */
  824. /* Add semaphore p with addr a to list in seg. */
  825. static void
  826. semqueue(Segment *s, long *a, Sema *p)
  827. {
  828. memset(p, 0, sizeof *p);
  829. p->addr = a;
  830. lock(&s->sema); /* uses s->sema.Rendez.Lock, but no one else is */
  831. p->next = &s->sema;
  832. p->prev = s->sema.prev;
  833. p->next->prev = p;
  834. p->prev->next = p;
  835. unlock(&s->sema);
  836. }
  837. /* Remove semaphore p from list in seg. */
  838. static void
  839. semdequeue(Segment *s, Sema *p)
  840. {
  841. lock(&s->sema);
  842. p->next->prev = p->prev;
  843. p->prev->next = p->next;
  844. unlock(&s->sema);
  845. }
  846. /* Wake up n waiters with addr a on list in seg. */
  847. static void
  848. semwakeup(Segment *s, long *a, long n)
  849. {
  850. Sema *p;
  851. lock(&s->sema);
  852. for(p=s->sema.next; p!=&s->sema && n>0; p=p->next){
  853. if(p->addr == a && p->waiting){
  854. p->waiting = 0;
  855. coherence();
  856. wakeup(p);
  857. n--;
  858. }
  859. }
  860. unlock(&s->sema);
  861. }
  862. /* Add delta to semaphore and wake up waiters as appropriate. */
  863. static long
  864. semrelease(Segment *s, long *addr, long delta)
  865. {
  866. long value;
  867. do
  868. value = *addr;
  869. while(!cmpswap(addr, value, value+delta));
  870. semwakeup(s, addr, delta);
  871. return value+delta;
  872. }
  873. /* Try to acquire semaphore using compare-and-swap */
  874. static int
  875. canacquire(long *addr)
  876. {
  877. long value;
  878. while((value=*addr) > 0)
  879. if(cmpswap(addr, value, value-1))
  880. return 1;
  881. return 0;
  882. }
  883. /* Should we wake up? */
  884. static int
  885. semawoke(void *p)
  886. {
  887. coherence();
  888. return !((Sema*)p)->waiting;
  889. }
  890. /* Acquire semaphore (subtract 1). */
  891. static int
  892. semacquire(Segment *s, long *addr, int block)
  893. {
  894. int acquired;
  895. Sema phore;
  896. if(canacquire(addr))
  897. return 1;
  898. if(!block)
  899. return 0;
  900. acquired = 0;
  901. semqueue(s, addr, &phore);
  902. for(;;){
  903. phore.waiting = 1;
  904. coherence();
  905. if(canacquire(addr)){
  906. acquired = 1;
  907. break;
  908. }
  909. if(waserror())
  910. break;
  911. sleep(&phore, semawoke, &phore);
  912. poperror();
  913. }
  914. semdequeue(s, &phore);
  915. coherence(); /* not strictly necessary due to lock in semdequeue */
  916. if(!phore.waiting)
  917. semwakeup(s, addr, 1);
  918. if(!acquired)
  919. nexterror();
  920. return 1;
  921. }
  922. long
  923. syssemacquire(ulong *arg)
  924. {
  925. int block;
  926. long *addr;
  927. Segment *s;
  928. validaddr(arg[0], sizeof(long), 1);
  929. evenaddr(arg[0]);
  930. addr = (long*)arg[0];
  931. block = arg[1];
  932. if((s = seg(up, (ulong)addr, 0)) == nil)
  933. error(Ebadarg);
  934. if(*addr < 0)
  935. error(Ebadarg);
  936. return semacquire(s, addr, block);
  937. }
  938. long
  939. syssemrelease(ulong *arg)
  940. {
  941. long *addr, delta;
  942. Segment *s;
  943. validaddr(arg[0], sizeof(long), 1);
  944. evenaddr(arg[0]);
  945. addr = (long*)arg[0];
  946. delta = arg[1];
  947. if((s = seg(up, (ulong)addr, 0)) == nil)
  948. error(Ebadarg);
  949. if(delta < 0 || *addr < 0)
  950. error(Ebadarg);
  951. return semrelease(s, addr, arg[1]);
  952. }