sysproc.c 22 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133
  1. #include "u.h"
  2. #include "tos.h"
  3. #include "../port/lib.h"
  4. #include "mem.h"
  5. #include "dat.h"
  6. #include "fns.h"
  7. #include "../port/error.h"
  8. #include "edf.h"
  9. #include <a.out.h>
  10. int shargs(char*, int, char**);
  11. extern void checkpages(void);
  12. extern void checkpagerefs(void);
  13. long
  14. sysr1(ulong*)
  15. {
  16. checkpagerefs();
  17. return 0;
  18. }
  19. long
  20. sysrfork(ulong *arg)
  21. {
  22. Proc *p;
  23. int n, i;
  24. Fgrp *ofg;
  25. Pgrp *opg;
  26. Rgrp *org;
  27. Egrp *oeg;
  28. ulong pid, flag;
  29. Mach *wm;
  30. flag = arg[0];
  31. /* Check flags before we commit */
  32. if((flag & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
  33. error(Ebadarg);
  34. if((flag & (RFNAMEG|RFCNAMEG)) == (RFNAMEG|RFCNAMEG))
  35. error(Ebadarg);
  36. if((flag & (RFENVG|RFCENVG)) == (RFENVG|RFCENVG))
  37. error(Ebadarg);
  38. if((flag&RFPROC) == 0) {
  39. if(flag & (RFMEM|RFNOWAIT))
  40. error(Ebadarg);
  41. if(flag & (RFFDG|RFCFDG)) {
  42. ofg = up->fgrp;
  43. if(flag & RFFDG)
  44. up->fgrp = dupfgrp(ofg);
  45. else
  46. up->fgrp = dupfgrp(nil);
  47. closefgrp(ofg);
  48. }
  49. if(flag & (RFNAMEG|RFCNAMEG)) {
  50. opg = up->pgrp;
  51. up->pgrp = newpgrp();
  52. if(flag & RFNAMEG)
  53. pgrpcpy(up->pgrp, opg);
  54. /* inherit noattach */
  55. up->pgrp->noattach = opg->noattach;
  56. closepgrp(opg);
  57. }
  58. if(flag & RFNOMNT)
  59. up->pgrp->noattach = 1;
  60. if(flag & RFREND) {
  61. org = up->rgrp;
  62. up->rgrp = newrgrp();
  63. closergrp(org);
  64. }
  65. if(flag & (RFENVG|RFCENVG)) {
  66. oeg = up->egrp;
  67. up->egrp = smalloc(sizeof(Egrp));
  68. up->egrp->ref = 1;
  69. if(flag & RFENVG)
  70. envcpy(up->egrp, oeg);
  71. closeegrp(oeg);
  72. }
  73. if(flag & RFNOTEG)
  74. up->noteid = incref(&noteidalloc);
  75. return 0;
  76. }
  77. p = newproc();
  78. p->fpsave = up->fpsave;
  79. p->scallnr = up->scallnr;
  80. p->s = up->s;
  81. p->nerrlab = 0;
  82. p->slash = up->slash;
  83. p->dot = up->dot;
  84. incref(p->dot);
  85. memmove(p->note, up->note, sizeof(p->note));
  86. p->privatemem = up->privatemem;
  87. p->noswap = up->noswap;
  88. p->nnote = up->nnote;
  89. p->notified = 0;
  90. p->lastnote = up->lastnote;
  91. p->notify = up->notify;
  92. p->ureg = up->ureg;
  93. p->dbgreg = 0;
  94. /* Make a new set of memory segments */
  95. n = flag & RFMEM;
  96. qlock(&p->seglock);
  97. if(waserror()){
  98. qunlock(&p->seglock);
  99. nexterror();
  100. }
  101. for(i = 0; i < NSEG; i++)
  102. if(up->seg[i])
  103. p->seg[i] = dupseg(up->seg, i, n);
  104. qunlock(&p->seglock);
  105. poperror();
  106. /* File descriptors */
  107. if(flag & (RFFDG|RFCFDG)) {
  108. if(flag & RFFDG)
  109. p->fgrp = dupfgrp(up->fgrp);
  110. else
  111. p->fgrp = dupfgrp(nil);
  112. }
  113. else {
  114. p->fgrp = up->fgrp;
  115. incref(p->fgrp);
  116. }
  117. /* Process groups */
  118. if(flag & (RFNAMEG|RFCNAMEG)) {
  119. p->pgrp = newpgrp();
  120. if(flag & RFNAMEG)
  121. pgrpcpy(p->pgrp, up->pgrp);
  122. /* inherit noattach */
  123. p->pgrp->noattach = up->pgrp->noattach;
  124. }
  125. else {
  126. p->pgrp = up->pgrp;
  127. incref(p->pgrp);
  128. }
  129. if(flag & RFNOMNT)
  130. p->pgrp->noattach = 1;
  131. if(flag & RFREND)
  132. p->rgrp = newrgrp();
  133. else {
  134. incref(up->rgrp);
  135. p->rgrp = up->rgrp;
  136. }
  137. /* Environment group */
  138. if(flag & (RFENVG|RFCENVG)) {
  139. p->egrp = smalloc(sizeof(Egrp));
  140. p->egrp->ref = 1;
  141. if(flag & RFENVG)
  142. envcpy(p->egrp, up->egrp);
  143. }
  144. else {
  145. p->egrp = up->egrp;
  146. incref(p->egrp);
  147. }
  148. p->hang = up->hang;
  149. p->procmode = up->procmode;
  150. /* Craft a return frame which will cause the child to pop out of
  151. * the scheduler in user mode with the return register zero
  152. */
  153. forkchild(p, up->dbgreg);
  154. p->parent = up;
  155. p->parentpid = up->pid;
  156. if(flag&RFNOWAIT)
  157. p->parentpid = 0;
  158. else {
  159. lock(&up->exl);
  160. up->nchild++;
  161. unlock(&up->exl);
  162. }
  163. if((flag&RFNOTEG) == 0)
  164. p->noteid = up->noteid;
  165. /* don't penalize the child, it hasn't done FP in a note handler. */
  166. p->fpstate = up->fpstate & ~FPillegal;
  167. pid = p->pid;
  168. memset(p->time, 0, sizeof(p->time));
  169. p->time[TReal] = MACHP(0)->ticks;
  170. kstrdup(&p->text, up->text);
  171. kstrdup(&p->user, up->user);
  172. /*
  173. * since the bss/data segments are now shareable,
  174. * any mmu info about this process is now stale
  175. * (i.e. has bad properties) and has to be discarded.
  176. */
  177. flushmmu();
  178. p->basepri = up->basepri;
  179. p->priority = up->basepri;
  180. p->fixedpri = up->fixedpri;
  181. p->mp = up->mp;
  182. wm = up->wired;
  183. if(wm)
  184. procwired(p, wm->machno);
  185. ready(p);
  186. sched();
  187. return pid;
  188. }
  189. static ulong
  190. l2be(long l)
  191. {
  192. uchar *cp;
  193. cp = (uchar*)&l;
  194. return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3];
  195. }
  196. long
  197. sysexec(ulong *arg)
  198. {
  199. Segment *s, *ts;
  200. ulong t, d, b;
  201. int i;
  202. Chan *tc;
  203. char **argv, **argp;
  204. char *a, *charp, *args, *file, *file0;
  205. char *progarg[sizeof(Exec)/2+1], *elem, progelem[64];
  206. ulong ssize, spage, nargs, nbytes, n, bssend;
  207. int indir;
  208. Exec exec;
  209. char line[sizeof(Exec)];
  210. Fgrp *f;
  211. Image *img;
  212. ulong magic, text, entry, data, bss;
  213. Tos *tos;
  214. indir = 0;
  215. elem = nil;
  216. validaddr(arg[0], 1, 0);
  217. file0 = validnamedup((char*)arg[0], 1);
  218. if(waserror()){
  219. free(file0);
  220. free(elem);
  221. nexterror();
  222. }
  223. file = file0;
  224. for(;;){
  225. tc = namec(file, Aopen, OEXEC, 0);
  226. if(waserror()){
  227. cclose(tc);
  228. nexterror();
  229. }
  230. if(!indir)
  231. kstrdup(&elem, up->genbuf);
  232. n = devtab[tc->type]->read(tc, &exec, sizeof(Exec), 0);
  233. if(n < 2)
  234. error(Ebadexec);
  235. magic = l2be(exec.magic);
  236. text = l2be(exec.text);
  237. entry = l2be(exec.entry);
  238. if(n==sizeof(Exec) && (magic == AOUT_MAGIC)){
  239. if(text >= USTKTOP-UTZERO
  240. || entry < UTZERO+sizeof(Exec)
  241. || entry >= UTZERO+sizeof(Exec)+text)
  242. error(Ebadexec);
  243. break; /* for binary */
  244. }
  245. /*
  246. * Process #! /bin/sh args ...
  247. */
  248. memmove(line, &exec, sizeof(Exec));
  249. if(indir || line[0]!='#' || line[1]!='!')
  250. error(Ebadexec);
  251. n = shargs(line, n, progarg);
  252. if(n == 0)
  253. error(Ebadexec);
  254. indir = 1;
  255. /*
  256. * First arg becomes complete file name
  257. */
  258. progarg[n++] = file;
  259. progarg[n] = 0;
  260. validaddr(arg[1], BY2WD, 1);
  261. arg[1] += BY2WD;
  262. file = progarg[0];
  263. if(strlen(elem) >= sizeof progelem)
  264. error(Ebadexec);
  265. strcpy(progelem, elem);
  266. progarg[0] = progelem;
  267. poperror();
  268. cclose(tc);
  269. }
  270. data = l2be(exec.data);
  271. bss = l2be(exec.bss);
  272. t = (UTZERO+sizeof(Exec)+text+(BY2PG-1)) & ~(BY2PG-1);
  273. d = (t + data + (BY2PG-1)) & ~(BY2PG-1);
  274. bssend = t + data + bss;
  275. b = (bssend + (BY2PG-1)) & ~(BY2PG-1);
  276. if(t >= KZERO || d >= KZERO || b >= KZERO)
  277. error(Ebadexec);
  278. /*
  279. * Args: pass 1: count
  280. */
  281. nbytes = sizeof(Tos); /* hole for profiling clock at top of stack (and more) */
  282. nargs = 0;
  283. if(indir){
  284. argp = progarg;
  285. while(*argp){
  286. a = *argp++;
  287. nbytes += strlen(a) + 1;
  288. nargs++;
  289. }
  290. }
  291. evenaddr(arg[1]);
  292. argp = (char**)arg[1];
  293. validaddr((ulong)argp, BY2WD, 0);
  294. while(*argp){
  295. a = *argp++;
  296. if(((ulong)argp&(BY2PG-1)) < BY2WD)
  297. validaddr((ulong)argp, BY2WD, 0);
  298. validaddr((ulong)a, 1, 0);
  299. nbytes += ((char*)vmemchr(a, 0, 0x7FFFFFFF) - a) + 1;
  300. nargs++;
  301. }
  302. ssize = BY2WD*(nargs+1) + ((nbytes+(BY2WD-1)) & ~(BY2WD-1));
  303. /*
  304. * 8-byte align SP for those (e.g. sparc) that need it.
  305. * execregs() will subtract another 4 bytes for argc.
  306. */
  307. if((ssize+4) & 7)
  308. ssize += 4;
  309. spage = (ssize+(BY2PG-1)) >> PGSHIFT;
  310. /*
  311. * Build the stack segment, putting it in kernel virtual for the moment
  312. */
  313. if(spage > TSTKSIZ)
  314. error(Enovmem);
  315. qlock(&up->seglock);
  316. if(waserror()){
  317. qunlock(&up->seglock);
  318. nexterror();
  319. }
  320. up->seg[ESEG] = newseg(SG_STACK, TSTKTOP-USTKSIZE, USTKSIZE/BY2PG);
  321. /*
  322. * Args: pass 2: assemble; the pages will be faulted in
  323. */
  324. tos = (Tos*)(TSTKTOP - sizeof(Tos));
  325. tos->cyclefreq = m->cyclefreq;
  326. cycles((uvlong*)&tos->pcycles);
  327. tos->pcycles = -tos->pcycles;
  328. tos->kcycles = tos->pcycles;
  329. tos->clock = 0;
  330. argv = (char**)(TSTKTOP - ssize);
  331. charp = (char*)(TSTKTOP - nbytes);
  332. args = charp;
  333. if(indir)
  334. argp = progarg;
  335. else
  336. argp = (char**)arg[1];
  337. for(i=0; i<nargs; i++){
  338. if(indir && *argp==0) {
  339. indir = 0;
  340. argp = (char**)arg[1];
  341. }
  342. *argv++ = charp + (USTKTOP-TSTKTOP);
  343. n = strlen(*argp) + 1;
  344. memmove(charp, *argp++, n);
  345. charp += n;
  346. }
  347. free(file0);
  348. free(up->text);
  349. up->text = elem;
  350. elem = nil; /* so waserror() won't free elem */
  351. USED(elem);
  352. /* copy args; easiest from new process's stack */
  353. n = charp - args;
  354. if(n > 128) /* don't waste too much space on huge arg lists */
  355. n = 128;
  356. a = up->args;
  357. up->args = nil;
  358. free(a);
  359. up->args = smalloc(n);
  360. memmove(up->args, args, n);
  361. if(n>0 && up->args[n-1]!='\0'){
  362. /* make sure last arg is NUL-terminated */
  363. /* put NUL at UTF-8 character boundary */
  364. for(i=n-1; i>0; --i)
  365. if(fullrune(up->args+i, n-i))
  366. break;
  367. up->args[i] = 0;
  368. n = i+1;
  369. }
  370. up->nargs = n;
  371. /*
  372. * Committed.
  373. * Free old memory.
  374. * Special segments are maintained across exec
  375. */
  376. for(i = SSEG; i <= BSEG; i++) {
  377. putseg(up->seg[i]);
  378. /* prevent a second free if we have an error */
  379. up->seg[i] = 0;
  380. }
  381. for(i = BSEG+1; i < NSEG; i++) {
  382. s = up->seg[i];
  383. if(s != 0 && (s->type&SG_CEXEC)) {
  384. putseg(s);
  385. up->seg[i] = 0;
  386. }
  387. }
  388. /*
  389. * Close on exec
  390. */
  391. f = up->fgrp;
  392. for(i=0; i<=f->maxfd; i++)
  393. fdclose(i, CCEXEC);
  394. /* Text. Shared. Attaches to cache image if possible */
  395. /* attachimage returns a locked cache image */
  396. img = attachimage(SG_TEXT|SG_RONLY, tc, UTZERO, (t-UTZERO)>>PGSHIFT);
  397. ts = img->s;
  398. up->seg[TSEG] = ts;
  399. ts->flushme = 1;
  400. ts->fstart = 0;
  401. ts->flen = sizeof(Exec)+text;
  402. unlock(img);
  403. /* Data. Shared. */
  404. s = newseg(SG_DATA, t, (d-t)>>PGSHIFT);
  405. up->seg[DSEG] = s;
  406. /* Attached by hand */
  407. incref(img);
  408. s->image = img;
  409. s->fstart = ts->fstart+ts->flen;
  410. s->flen = data;
  411. /* BSS. Zero fill on demand */
  412. up->seg[BSEG] = newseg(SG_BSS, d, (b-d)>>PGSHIFT);
  413. /*
  414. * Move the stack
  415. */
  416. s = up->seg[ESEG];
  417. up->seg[ESEG] = 0;
  418. up->seg[SSEG] = s;
  419. qunlock(&up->seglock);
  420. poperror(); /* seglock */
  421. poperror(); /* elem */
  422. s->base = USTKTOP-USTKSIZE;
  423. s->top = USTKTOP;
  424. relocateseg(s, USTKTOP-TSTKTOP);
  425. /*
  426. * '/' processes are higher priority (hack to make /ip more responsive).
  427. */
  428. if(devtab[tc->type]->dc == L'/')
  429. up->basepri = PriRoot;
  430. up->priority = up->basepri;
  431. poperror();
  432. cclose(tc);
  433. /*
  434. * At this point, the mmu contains info about the old address
  435. * space and needs to be flushed
  436. */
  437. flushmmu();
  438. qlock(&up->debug);
  439. up->nnote = 0;
  440. up->notify = 0;
  441. up->notified = 0;
  442. up->privatemem = 0;
  443. procsetup(up);
  444. qunlock(&up->debug);
  445. if(up->hang)
  446. up->procctl = Proc_stopme;
  447. return execregs(entry, ssize, nargs);
  448. }
  449. int
  450. shargs(char *s, int n, char **ap)
  451. {
  452. int i;
  453. s += 2;
  454. n -= 2; /* skip #! */
  455. for(i=0; s[i]!='\n'; i++)
  456. if(i == n-1)
  457. return 0;
  458. s[i] = 0;
  459. *ap = 0;
  460. i = 0;
  461. for(;;) {
  462. while(*s==' ' || *s=='\t')
  463. s++;
  464. if(*s == 0)
  465. break;
  466. i++;
  467. *ap++ = s;
  468. *ap = 0;
  469. while(*s && *s!=' ' && *s!='\t')
  470. s++;
  471. if(*s == 0)
  472. break;
  473. else
  474. *s++ = 0;
  475. }
  476. return i;
  477. }
  478. int
  479. return0(void*)
  480. {
  481. return 0;
  482. }
  483. long
  484. syssleep(ulong *arg)
  485. {
  486. int n;
  487. n = arg[0];
  488. if(n <= 0) {
  489. if (up->edf && (up->edf->flags & Admitted))
  490. edfyield();
  491. else
  492. yield();
  493. return 0;
  494. }
  495. if(n < TK2MS(1))
  496. n = TK2MS(1);
  497. tsleep(&up->sleep, return0, 0, n);
  498. return 0;
  499. }
  500. long
  501. sysalarm(ulong *arg)
  502. {
  503. return procalarm(arg[0]);
  504. }
  505. long
  506. sysexits(ulong *arg)
  507. {
  508. char *status;
  509. char *inval = "invalid exit string";
  510. char buf[ERRMAX];
  511. status = (char*)arg[0];
  512. if(status){
  513. if(waserror())
  514. status = inval;
  515. else{
  516. validaddr((ulong)status, 1, 0);
  517. if(vmemchr(status, 0, ERRMAX) == 0){
  518. memmove(buf, status, ERRMAX);
  519. buf[ERRMAX-1] = 0;
  520. status = buf;
  521. }
  522. poperror();
  523. }
  524. }
  525. pexit(status, 1);
  526. return 0; /* not reached */
  527. }
  528. long
  529. sys_wait(ulong *arg)
  530. {
  531. int pid;
  532. Waitmsg w;
  533. OWaitmsg *ow;
  534. if(arg[0] == 0)
  535. return pwait(nil);
  536. validaddr(arg[0], sizeof(OWaitmsg), 1);
  537. evenaddr(arg[0]);
  538. pid = pwait(&w);
  539. if(pid >= 0){
  540. ow = (OWaitmsg*)arg[0];
  541. readnum(0, ow->pid, NUMSIZE, w.pid, NUMSIZE);
  542. readnum(0, ow->time+TUser*NUMSIZE, NUMSIZE, w.time[TUser], NUMSIZE);
  543. readnum(0, ow->time+TSys*NUMSIZE, NUMSIZE, w.time[TSys], NUMSIZE);
  544. readnum(0, ow->time+TReal*NUMSIZE, NUMSIZE, w.time[TReal], NUMSIZE);
  545. strncpy(ow->msg, w.msg, sizeof(ow->msg));
  546. ow->msg[sizeof(ow->msg)-1] = '\0';
  547. }
  548. return pid;
  549. }
  550. long
  551. sysawait(ulong *arg)
  552. {
  553. int i;
  554. int pid;
  555. Waitmsg w;
  556. ulong n;
  557. n = arg[1];
  558. validaddr(arg[0], n, 1);
  559. pid = pwait(&w);
  560. if(pid < 0)
  561. return -1;
  562. i = snprint((char*)arg[0], n, "%d %lud %lud %lud %q",
  563. w.pid,
  564. w.time[TUser], w.time[TSys], w.time[TReal],
  565. w.msg);
  566. return i;
  567. }
  568. void
  569. werrstr(char *fmt, ...)
  570. {
  571. va_list va;
  572. if(up == nil)
  573. return;
  574. va_start(va, fmt);
  575. vseprint(up->syserrstr, up->syserrstr+ERRMAX, fmt, va);
  576. va_end(va);
  577. }
  578. static long
  579. generrstr(char *buf, uint nbuf)
  580. {
  581. char tmp[ERRMAX];
  582. if(nbuf == 0)
  583. error(Ebadarg);
  584. validaddr((ulong)buf, nbuf, 1);
  585. if(nbuf > sizeof tmp)
  586. nbuf = sizeof tmp;
  587. memmove(tmp, buf, nbuf);
  588. /* make sure it's NUL-terminated */
  589. tmp[nbuf-1] = '\0';
  590. memmove(buf, up->syserrstr, nbuf);
  591. buf[nbuf-1] = '\0';
  592. memmove(up->syserrstr, tmp, nbuf);
  593. return 0;
  594. }
  595. long
  596. syserrstr(ulong *arg)
  597. {
  598. return generrstr((char*)arg[0], arg[1]);
  599. }
  600. /* compatibility for old binaries */
  601. long
  602. sys_errstr(ulong *arg)
  603. {
  604. return generrstr((char*)arg[0], 64);
  605. }
  606. long
  607. sysnotify(ulong *arg)
  608. {
  609. if(arg[0] != 0)
  610. validaddr(arg[0], sizeof(ulong), 0);
  611. up->notify = (int(*)(void*, char*))(arg[0]);
  612. return 0;
  613. }
  614. long
  615. sysnoted(ulong *arg)
  616. {
  617. if(arg[0]!=NRSTR && !up->notified)
  618. error(Egreg);
  619. return 0;
  620. }
  621. long
  622. syssegbrk(ulong *arg)
  623. {
  624. int i;
  625. ulong addr;
  626. Segment *s;
  627. addr = arg[0];
  628. for(i = 0; i < NSEG; i++) {
  629. s = up->seg[i];
  630. if(s == 0 || addr < s->base || addr >= s->top)
  631. continue;
  632. switch(s->type&SG_TYPE) {
  633. case SG_TEXT:
  634. case SG_DATA:
  635. case SG_STACK:
  636. error(Ebadarg);
  637. default:
  638. return ibrk(arg[1], i);
  639. }
  640. }
  641. error(Ebadarg);
  642. return 0; /* not reached */
  643. }
  644. long
  645. syssegattach(ulong *arg)
  646. {
  647. return segattach(up, arg[0], (char*)arg[1], arg[2], arg[3]);
  648. }
  649. long
  650. syssegdetach(ulong *arg)
  651. {
  652. int i;
  653. ulong addr;
  654. Segment *s;
  655. qlock(&up->seglock);
  656. if(waserror()){
  657. qunlock(&up->seglock);
  658. nexterror();
  659. }
  660. s = 0;
  661. addr = arg[0];
  662. for(i = 0; i < NSEG; i++)
  663. if(s = up->seg[i]) {
  664. qlock(&s->lk);
  665. if((addr >= s->base && addr < s->top) ||
  666. (s->top == s->base && addr == s->base))
  667. goto found;
  668. qunlock(&s->lk);
  669. }
  670. error(Ebadarg);
  671. found:
  672. /*
  673. * Check we are not detaching the initial stack segment.
  674. */
  675. if(s == up->seg[SSEG]){
  676. qunlock(&s->lk);
  677. error(Ebadarg);
  678. }
  679. up->seg[i] = 0;
  680. qunlock(&s->lk);
  681. putseg(s);
  682. qunlock(&up->seglock);
  683. poperror();
  684. /* Ensure we flush any entries from the lost segment */
  685. flushmmu();
  686. return 0;
  687. }
  688. long
  689. syssegfree(ulong *arg)
  690. {
  691. Segment *s;
  692. ulong from, to;
  693. from = arg[0];
  694. s = seg(up, from, 1);
  695. if(s == nil)
  696. error(Ebadarg);
  697. to = (from + arg[1]) & ~(BY2PG-1);
  698. from = PGROUND(from);
  699. if(to > s->top) {
  700. qunlock(&s->lk);
  701. error(Ebadarg);
  702. }
  703. mfreeseg(s, from, (to - from) / BY2PG);
  704. qunlock(&s->lk);
  705. flushmmu();
  706. return 0;
  707. }
  708. /* For binary compatibility */
  709. long
  710. sysbrk_(ulong *arg)
  711. {
  712. return ibrk(arg[0], BSEG);
  713. }
  714. long
  715. sysrendezvous(ulong *arg)
  716. {
  717. uintptr tag, val;
  718. Proc *p, **l;
  719. tag = arg[0];
  720. l = &REND(up->rgrp, tag);
  721. up->rendval = ~(uintptr)0;
  722. lock(up->rgrp);
  723. for(p = *l; p; p = p->rendhash) {
  724. if(p->rendtag == tag) {
  725. *l = p->rendhash;
  726. val = p->rendval;
  727. p->rendval = arg[1];
  728. while(p->mach != 0)
  729. ;
  730. ready(p);
  731. unlock(up->rgrp);
  732. return val;
  733. }
  734. l = &p->rendhash;
  735. }
  736. /* Going to sleep here */
  737. up->rendtag = tag;
  738. up->rendval = arg[1];
  739. up->rendhash = *l;
  740. *l = up;
  741. up->state = Rendezvous;
  742. unlock(up->rgrp);
  743. sched();
  744. return up->rendval;
  745. }
  746. /*
  747. * The implementation of semaphores is complicated by needing
  748. * to avoid rescheduling in syssemrelease, so that it is safe
  749. * to call from real-time processes. This means syssemrelease
  750. * cannot acquire any qlocks, only spin locks.
  751. *
  752. * Semacquire and semrelease must both manipulate the semaphore
  753. * wait list. Lock-free linked lists only exist in theory, not
  754. * in practice, so the wait list is protected by a spin lock.
  755. *
  756. * The semaphore value *addr is stored in user memory, so it
  757. * cannot be read or written while holding spin locks.
  758. *
  759. * Thus, we can access the list only when holding the lock, and
  760. * we can access the semaphore only when not holding the lock.
  761. * This makes things interesting. Note that sleep's condition function
  762. * is called while holding two locks - r and up->rlock - so it cannot
  763. * access the semaphore value either.
  764. *
  765. * An acquirer announces its intention to try for the semaphore
  766. * by putting a Sema structure onto the wait list and then
  767. * setting Sema.waiting. After one last check of semaphore,
  768. * the acquirer sleeps until Sema.waiting==0. A releaser of n
  769. * must wake up n acquirers who have Sema.waiting set. It does
  770. * this by clearing Sema.waiting and then calling wakeup.
  771. *
  772. * There are three interesting races here.
  773. * The first is that in this particular sleep/wakeup usage, a single
  774. * wakeup can rouse a process from two consecutive sleeps!
  775. * The ordering is:
  776. *
  777. * (a) set Sema.waiting = 1
  778. * (a) call sleep
  779. * (b) set Sema.waiting = 0
  780. * (a) check Sema.waiting inside sleep, return w/o sleeping
  781. * (a) try for semaphore, fail
  782. * (a) set Sema.waiting = 1
  783. * (a) call sleep
  784. * (b) call wakeup(a)
  785. * (a) wake up again
  786. *
  787. * This is okay - semacquire will just go around the loop
  788. * again. It does mean that at the top of the for(;;) loop in
  789. * semacquire, phore.waiting might already be set to 1.
  790. *
  791. * The second is that a releaser might wake an acquirer who is
  792. * interrupted before he can acquire the lock. Since
  793. * release(n) issues only n wakeup calls -- only n can be used
  794. * anyway -- if the interrupted process is not going to use his
  795. * wakeup call he must pass it on to another acquirer.
  796. *
  797. * The third race is similar to the second but more subtle. An
  798. * acquirer sets waiting=1 and then does a final canacquire()
  799. * before going to sleep. The opposite order would result in
  800. * missing wakeups that happen between canacquire and
  801. * waiting=1. (In fact, the whole point of Sema.waiting is to
  802. * avoid missing wakeups between canacquire() and sleep().) But
  803. * there can be spurious wakeups between a successful
  804. * canacquire() and the following semdequeue(). This wakeup is
  805. * not useful to the acquirer, since he has already acquired
  806. * the semaphore. Like in the previous case, though, the
  807. * acquirer must pass the wakeup call along.
  808. *
  809. * This is all rather subtle. The code below has been verified
  810. * with the spin model /sys/src/9/port/semaphore.p. The
  811. * original code anticipated the second race but not the first
  812. * or third, which were caught only with spin. The first race
  813. * is mentioned in /sys/doc/sleep.ps, but I'd forgotten about it.
  814. * It was lucky that my abstract model of sleep/wakeup still managed
  815. * to preserve that behavior.
  816. *
  817. * I remain slightly concerned about memory coherence
  818. * outside of locks. The spin model does not take
  819. * queued processor writes into account so we have to
  820. * think hard. The only variables accessed outside locks
  821. * are the semaphore value itself and the boolean flag
  822. * Sema.waiting. The value is only accessed with cmpswap,
  823. * whose job description includes doing the right thing as
  824. * far as memory coherence across processors. That leaves
  825. * Sema.waiting. To handle it, we call coherence() before each
  826. * read and after each write. - rsc
  827. */
  828. /* Add semaphore p with addr a to list in seg. */
  829. static void
  830. semqueue(Segment *s, long *a, Sema *p)
  831. {
  832. memset(p, 0, sizeof *p);
  833. p->addr = a;
  834. lock(&s->sema); /* uses s->sema.Rendez.Lock, but no one else is */
  835. p->next = &s->sema;
  836. p->prev = s->sema.prev;
  837. p->next->prev = p;
  838. p->prev->next = p;
  839. unlock(&s->sema);
  840. }
  841. /* Remove semaphore p from list in seg. */
  842. static void
  843. semdequeue(Segment *s, Sema *p)
  844. {
  845. lock(&s->sema);
  846. p->next->prev = p->prev;
  847. p->prev->next = p->next;
  848. unlock(&s->sema);
  849. }
  850. /* Wake up n waiters with addr a on list in seg. */
  851. static void
  852. semwakeup(Segment *s, long *a, long n)
  853. {
  854. Sema *p;
  855. lock(&s->sema);
  856. for(p=s->sema.next; p!=&s->sema && n>0; p=p->next){
  857. if(p->addr == a && p->waiting){
  858. p->waiting = 0;
  859. coherence();
  860. wakeup(p);
  861. n--;
  862. }
  863. }
  864. unlock(&s->sema);
  865. }
  866. /* Add delta to semaphore and wake up waiters as appropriate. */
  867. static long
  868. semrelease(Segment *s, long *addr, long delta)
  869. {
  870. long value;
  871. do
  872. value = *addr;
  873. while(!cmpswap(addr, value, value+delta));
  874. semwakeup(s, addr, delta);
  875. return value+delta;
  876. }
  877. /* Try to acquire semaphore using compare-and-swap */
  878. static int
  879. canacquire(long *addr)
  880. {
  881. long value;
  882. while((value=*addr) > 0)
  883. if(cmpswap(addr, value, value-1))
  884. return 1;
  885. return 0;
  886. }
  887. /* Should we wake up? */
  888. static int
  889. semawoke(void *p)
  890. {
  891. coherence();
  892. return !((Sema*)p)->waiting;
  893. }
  894. /* Acquire semaphore (subtract 1). */
  895. static int
  896. semacquire(Segment *s, long *addr, int block)
  897. {
  898. int acquired;
  899. Sema phore;
  900. if(canacquire(addr))
  901. return 1;
  902. if(!block)
  903. return 0;
  904. acquired = 0;
  905. semqueue(s, addr, &phore);
  906. for(;;){
  907. phore.waiting = 1;
  908. coherence();
  909. if(canacquire(addr)){
  910. acquired = 1;
  911. break;
  912. }
  913. if(waserror())
  914. break;
  915. sleep(&phore, semawoke, &phore);
  916. poperror();
  917. }
  918. semdequeue(s, &phore);
  919. coherence(); /* not strictly necessary due to lock in semdequeue */
  920. if(!phore.waiting)
  921. semwakeup(s, addr, 1);
  922. if(!acquired)
  923. nexterror();
  924. return 1;
  925. }
  926. /* Acquire semaphore or time-out */
  927. static int
  928. tsemacquire(Segment *s, long *addr, ulong ms)
  929. {
  930. int acquired, timedout;
  931. ulong t, elms;
  932. Sema phore;
  933. if(canacquire(addr))
  934. return 1;
  935. if(ms == 0)
  936. return 0;
  937. acquired = timedout = 0;
  938. semqueue(s, addr, &phore);
  939. for(;;){
  940. phore.waiting = 1;
  941. coherence();
  942. if(canacquire(addr)){
  943. acquired = 1;
  944. break;
  945. }
  946. if(waserror())
  947. break;
  948. t = m->ticks;
  949. tsleep(&phore, semawoke, &phore, ms);
  950. elms = TK2MS(m->ticks - t);
  951. poperror();
  952. if(elms >= ms){
  953. timedout = 1;
  954. break;
  955. }
  956. ms -= elms;
  957. }
  958. semdequeue(s, &phore);
  959. coherence(); /* not strictly necessary due to lock in semdequeue */
  960. if(!phore.waiting)
  961. semwakeup(s, addr, 1);
  962. if(timedout)
  963. return 0;
  964. if(!acquired)
  965. nexterror();
  966. return 1;
  967. }
  968. long
  969. syssemacquire(ulong *arg)
  970. {
  971. int block;
  972. long *addr;
  973. Segment *s;
  974. validaddr(arg[0], sizeof(long), 1);
  975. evenaddr(arg[0]);
  976. addr = (long*)arg[0];
  977. block = arg[1];
  978. if((s = seg(up, (ulong)addr, 0)) == nil)
  979. error(Ebadarg);
  980. if(*addr < 0)
  981. error(Ebadarg);
  982. return semacquire(s, addr, block);
  983. }
  984. long
  985. systsemacquire(ulong *arg)
  986. {
  987. long *addr;
  988. ulong ms;
  989. Segment *s;
  990. validaddr(arg[0], sizeof(long), 1);
  991. evenaddr(arg[0]);
  992. addr = (long*)arg[0];
  993. ms = arg[1];
  994. if((s = seg(up, (ulong)addr, 0)) == nil)
  995. error(Ebadarg);
  996. if(*addr < 0)
  997. error(Ebadarg);
  998. return tsemacquire(s, addr, ms);
  999. }
  1000. long
  1001. syssemrelease(ulong *arg)
  1002. {
  1003. long *addr, delta;
  1004. Segment *s;
  1005. validaddr(arg[0], sizeof(long), 1);
  1006. evenaddr(arg[0]);
  1007. addr = (long*)arg[0];
  1008. delta = arg[1];
  1009. if((s = seg(up, (ulong)addr, 0)) == nil)
  1010. error(Ebadarg);
  1011. /* delta == 0 is a no-op, not a release */
  1012. if(delta < 0 || *addr < 0)
  1013. error(Ebadarg);
  1014. return semrelease(s, addr, delta);
  1015. }