sysproc.c 21 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073
  1. #include "u.h"
  2. #include "tos.h"
  3. #include "../port/lib.h"
  4. #include "mem.h"
  5. #include "dat.h"
  6. #include "fns.h"
  7. #include "../port/error.h"
  8. #include "edf.h"
  9. #include <a.out.h>
  10. int shargs(char*, int, char**);
  11. extern void checkpages(void);
  12. extern void checkpagerefs(void);
  13. long
  14. sysr1(ulong*)
  15. {
  16. checkpagerefs();
  17. return 0;
  18. }
  19. long
  20. sysrfork(ulong *arg)
  21. {
  22. Proc *p;
  23. int n, i;
  24. Fgrp *ofg;
  25. Pgrp *opg;
  26. Rgrp *org;
  27. Egrp *oeg;
  28. ulong pid, flag;
  29. Mach *wm;
  30. flag = arg[0];
  31. /* Check flags before we commit */
  32. if((flag & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
  33. error(Ebadarg);
  34. if((flag & (RFNAMEG|RFCNAMEG)) == (RFNAMEG|RFCNAMEG))
  35. error(Ebadarg);
  36. if((flag & (RFENVG|RFCENVG)) == (RFENVG|RFCENVG))
  37. error(Ebadarg);
  38. if((flag&RFPROC) == 0) {
  39. if(flag & (RFMEM|RFNOWAIT))
  40. error(Ebadarg);
  41. if(flag & (RFFDG|RFCFDG)) {
  42. ofg = up->fgrp;
  43. if(flag & RFFDG)
  44. up->fgrp = dupfgrp(ofg);
  45. else
  46. up->fgrp = dupfgrp(nil);
  47. closefgrp(ofg);
  48. }
  49. if(flag & (RFNAMEG|RFCNAMEG)) {
  50. opg = up->pgrp;
  51. up->pgrp = newpgrp();
  52. if(flag & RFNAMEG)
  53. pgrpcpy(up->pgrp, opg);
  54. /* inherit noattach */
  55. up->pgrp->noattach = opg->noattach;
  56. closepgrp(opg);
  57. }
  58. if(flag & RFNOMNT)
  59. up->pgrp->noattach = 1;
  60. if(flag & RFREND) {
  61. org = up->rgrp;
  62. up->rgrp = newrgrp();
  63. closergrp(org);
  64. }
  65. if(flag & (RFENVG|RFCENVG)) {
  66. oeg = up->egrp;
  67. up->egrp = smalloc(sizeof(Egrp));
  68. up->egrp->ref = 1;
  69. if(flag & RFENVG)
  70. envcpy(up->egrp, oeg);
  71. closeegrp(oeg);
  72. }
  73. if(flag & RFNOTEG)
  74. up->noteid = incref(&noteidalloc);
  75. return 0;
  76. }
  77. p = newproc();
  78. p->fpsave = up->fpsave;
  79. p->scallnr = up->scallnr;
  80. p->s = up->s;
  81. p->nerrlab = 0;
  82. p->slash = up->slash;
  83. p->dot = up->dot;
  84. incref(p->dot);
  85. memmove(p->note, up->note, sizeof(p->note));
  86. p->privatemem = up->privatemem;
  87. p->noswap = up->noswap;
  88. p->nnote = up->nnote;
  89. p->notified = 0;
  90. p->lastnote = up->lastnote;
  91. p->notify = up->notify;
  92. p->ureg = up->ureg;
  93. p->dbgreg = 0;
  94. /* Make a new set of memory segments */
  95. n = flag & RFMEM;
  96. qlock(&p->seglock);
  97. if(waserror()){
  98. qunlock(&p->seglock);
  99. nexterror();
  100. }
  101. for(i = 0; i < NSEG; i++)
  102. if(up->seg[i])
  103. p->seg[i] = dupseg(up->seg, i, n);
  104. qunlock(&p->seglock);
  105. poperror();
  106. /* File descriptors */
  107. if(flag & (RFFDG|RFCFDG)) {
  108. if(flag & RFFDG)
  109. p->fgrp = dupfgrp(up->fgrp);
  110. else
  111. p->fgrp = dupfgrp(nil);
  112. }
  113. else {
  114. p->fgrp = up->fgrp;
  115. incref(p->fgrp);
  116. }
  117. /* Process groups */
  118. if(flag & (RFNAMEG|RFCNAMEG)) {
  119. p->pgrp = newpgrp();
  120. if(flag & RFNAMEG)
  121. pgrpcpy(p->pgrp, up->pgrp);
  122. /* inherit noattach */
  123. p->pgrp->noattach = up->pgrp->noattach;
  124. }
  125. else {
  126. p->pgrp = up->pgrp;
  127. incref(p->pgrp);
  128. }
  129. if(flag & RFNOMNT)
  130. up->pgrp->noattach = 1;
  131. if(flag & RFREND)
  132. p->rgrp = newrgrp();
  133. else {
  134. incref(up->rgrp);
  135. p->rgrp = up->rgrp;
  136. }
  137. /* Environment group */
  138. if(flag & (RFENVG|RFCENVG)) {
  139. p->egrp = smalloc(sizeof(Egrp));
  140. p->egrp->ref = 1;
  141. if(flag & RFENVG)
  142. envcpy(p->egrp, up->egrp);
  143. }
  144. else {
  145. p->egrp = up->egrp;
  146. incref(p->egrp);
  147. }
  148. p->hang = up->hang;
  149. p->procmode = up->procmode;
  150. /* Craft a return frame which will cause the child to pop out of
  151. * the scheduler in user mode with the return register zero
  152. */
  153. forkchild(p, up->dbgreg);
  154. p->parent = up;
  155. p->parentpid = up->pid;
  156. if(flag&RFNOWAIT)
  157. p->parentpid = 0;
  158. else {
  159. lock(&up->exl);
  160. up->nchild++;
  161. unlock(&up->exl);
  162. }
  163. if((flag&RFNOTEG) == 0)
  164. p->noteid = up->noteid;
  165. p->fpstate = up->fpstate;
  166. pid = p->pid;
  167. memset(p->time, 0, sizeof(p->time));
  168. p->time[TReal] = MACHP(0)->ticks;
  169. kstrdup(&p->text, up->text);
  170. kstrdup(&p->user, up->user);
  171. /*
  172. * since the bss/data segments are now shareable,
  173. * any mmu info about this process is now stale
  174. * (i.e. has bad properties) and has to be discarded.
  175. */
  176. flushmmu();
  177. p->basepri = up->basepri;
  178. p->priority = up->basepri;
  179. p->fixedpri = up->fixedpri;
  180. p->mp = up->mp;
  181. wm = up->wired;
  182. if(wm)
  183. procwired(p, wm->machno);
  184. ready(p);
  185. sched();
  186. return pid;
  187. }
  188. static ulong
  189. l2be(long l)
  190. {
  191. uchar *cp;
  192. cp = (uchar*)&l;
  193. return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3];
  194. }
  195. long
  196. sysexec(ulong *arg)
  197. {
  198. Segment *s, *ts;
  199. ulong t, d, b;
  200. int i;
  201. Chan *tc;
  202. char **argv, **argp;
  203. char *a, *charp, *args, *file;
  204. char *progarg[sizeof(Exec)/2+1], *elem, progelem[64];
  205. ulong ssize, spage, nargs, nbytes, n, bssend;
  206. int indir;
  207. Exec exec;
  208. char line[sizeof(Exec)];
  209. Fgrp *f;
  210. Image *img;
  211. ulong magic, text, entry, data, bss;
  212. Tos *tos;
  213. validaddr(arg[0], 1, 0);
  214. file = (char*)arg[0];
  215. indir = 0;
  216. elem = nil;
  217. if(waserror()){
  218. free(elem);
  219. nexterror();
  220. }
  221. for(;;){
  222. tc = namec(file, Aopen, OEXEC, 0);
  223. if(waserror()){
  224. cclose(tc);
  225. nexterror();
  226. }
  227. if(!indir)
  228. kstrdup(&elem, up->genbuf);
  229. n = devtab[tc->type]->read(tc, &exec, sizeof(Exec), 0);
  230. if(n < 2)
  231. error(Ebadexec);
  232. magic = l2be(exec.magic);
  233. text = l2be(exec.text);
  234. entry = l2be(exec.entry);
  235. if(n==sizeof(Exec) && (magic == AOUT_MAGIC)){
  236. if(text >= USTKTOP-UTZERO
  237. || entry < UTZERO+sizeof(Exec)
  238. || entry >= UTZERO+sizeof(Exec)+text)
  239. error(Ebadexec);
  240. break; /* for binary */
  241. }
  242. /*
  243. * Process #! /bin/sh args ...
  244. */
  245. memmove(line, &exec, sizeof(Exec));
  246. if(indir || line[0]!='#' || line[1]!='!')
  247. error(Ebadexec);
  248. n = shargs(line, n, progarg);
  249. if(n == 0)
  250. error(Ebadexec);
  251. indir = 1;
  252. /*
  253. * First arg becomes complete file name
  254. */
  255. progarg[n++] = file;
  256. progarg[n] = 0;
  257. validaddr(arg[1], BY2WD, 1);
  258. arg[1] += BY2WD;
  259. file = progarg[0];
  260. if(strlen(elem) >= sizeof progelem)
  261. error(Ebadexec);
  262. strcpy(progelem, elem);
  263. progarg[0] = progelem;
  264. poperror();
  265. cclose(tc);
  266. }
  267. data = l2be(exec.data);
  268. bss = l2be(exec.bss);
  269. t = (UTZERO+sizeof(Exec)+text+(BY2PG-1)) & ~(BY2PG-1);
  270. d = (t + data + (BY2PG-1)) & ~(BY2PG-1);
  271. bssend = t + data + bss;
  272. b = (bssend + (BY2PG-1)) & ~(BY2PG-1);
  273. if(t >= KZERO || d >= KZERO || b >= KZERO)
  274. error(Ebadexec);
  275. /*
  276. * Args: pass 1: count
  277. */
  278. nbytes = sizeof(Tos); /* hole for profiling clock at top of stack (and more) */
  279. nargs = 0;
  280. if(indir){
  281. argp = progarg;
  282. while(*argp){
  283. a = *argp++;
  284. nbytes += strlen(a) + 1;
  285. nargs++;
  286. }
  287. }
  288. evenaddr(arg[1]);
  289. argp = (char**)arg[1];
  290. validaddr((ulong)argp, BY2WD, 0);
  291. while(*argp){
  292. a = *argp++;
  293. if(((ulong)argp&(BY2PG-1)) < BY2WD)
  294. validaddr((ulong)argp, BY2WD, 0);
  295. validaddr((ulong)a, 1, 0);
  296. nbytes += ((char*)vmemchr(a, 0, 0x7FFFFFFF) - a) + 1;
  297. nargs++;
  298. }
  299. ssize = BY2WD*(nargs+1) + ((nbytes+(BY2WD-1)) & ~(BY2WD-1));
  300. /*
  301. * 8-byte align SP for those (e.g. sparc) that need it.
  302. * execregs() will subtract another 4 bytes for argc.
  303. */
  304. if((ssize+4) & 7)
  305. ssize += 4;
  306. spage = (ssize+(BY2PG-1)) >> PGSHIFT;
  307. /*
  308. * Build the stack segment, putting it in kernel virtual for the moment
  309. */
  310. if(spage > TSTKSIZ)
  311. error(Enovmem);
  312. qlock(&up->seglock);
  313. if(waserror()){
  314. qunlock(&up->seglock);
  315. nexterror();
  316. }
  317. up->seg[ESEG] = newseg(SG_STACK, TSTKTOP-USTKSIZE, USTKSIZE/BY2PG);
  318. /*
  319. * Args: pass 2: assemble; the pages will be faulted in
  320. */
  321. tos = (Tos*)(TSTKTOP - sizeof(Tos));
  322. tos->cyclefreq = m->cyclefreq;
  323. cycles((uvlong*)&tos->pcycles);
  324. tos->pcycles = -tos->pcycles;
  325. tos->kcycles = tos->pcycles;
  326. tos->clock = 0;
  327. argv = (char**)(TSTKTOP - ssize);
  328. charp = (char*)(TSTKTOP - nbytes);
  329. args = charp;
  330. if(indir)
  331. argp = progarg;
  332. else
  333. argp = (char**)arg[1];
  334. for(i=0; i<nargs; i++){
  335. if(indir && *argp==0) {
  336. indir = 0;
  337. argp = (char**)arg[1];
  338. }
  339. *argv++ = charp + (USTKTOP-TSTKTOP);
  340. n = strlen(*argp) + 1;
  341. memmove(charp, *argp++, n);
  342. charp += n;
  343. }
  344. free(up->text);
  345. up->text = elem;
  346. elem = nil; /* so waserror() won't free elem */
  347. USED(elem);
  348. /* copy args; easiest from new process's stack */
  349. n = charp - args;
  350. if(n > 128) /* don't waste too much space on huge arg lists */
  351. n = 128;
  352. a = up->args;
  353. up->args = nil;
  354. free(a);
  355. up->args = smalloc(n);
  356. memmove(up->args, args, n);
  357. if(n>0 && up->args[n-1]!='\0'){
  358. /* make sure last arg is NUL-terminated */
  359. /* put NUL at UTF-8 character boundary */
  360. for(i=n-1; i>0; --i)
  361. if(fullrune(up->args+i, n-i))
  362. break;
  363. up->args[i] = 0;
  364. n = i+1;
  365. }
  366. up->nargs = n;
  367. /*
  368. * Committed.
  369. * Free old memory.
  370. * Special segments are maintained across exec
  371. */
  372. for(i = SSEG; i <= BSEG; i++) {
  373. putseg(up->seg[i]);
  374. /* prevent a second free if we have an error */
  375. up->seg[i] = 0;
  376. }
  377. for(i = BSEG+1; i < NSEG; i++) {
  378. s = up->seg[i];
  379. if(s != 0 && (s->type&SG_CEXEC)) {
  380. putseg(s);
  381. up->seg[i] = 0;
  382. }
  383. }
  384. /*
  385. * Close on exec
  386. */
  387. f = up->fgrp;
  388. for(i=0; i<=f->maxfd; i++)
  389. fdclose(i, CCEXEC);
  390. /* Text. Shared. Attaches to cache image if possible */
  391. /* attachimage returns a locked cache image */
  392. img = attachimage(SG_TEXT|SG_RONLY, tc, UTZERO, (t-UTZERO)>>PGSHIFT);
  393. ts = img->s;
  394. up->seg[TSEG] = ts;
  395. ts->flushme = 1;
  396. ts->fstart = 0;
  397. ts->flen = sizeof(Exec)+text;
  398. unlock(img);
  399. /* Data. Shared. */
  400. s = newseg(SG_DATA, t, (d-t)>>PGSHIFT);
  401. up->seg[DSEG] = s;
  402. /* Attached by hand */
  403. incref(img);
  404. s->image = img;
  405. s->fstart = ts->fstart+ts->flen;
  406. s->flen = data;
  407. /* BSS. Zero fill on demand */
  408. up->seg[BSEG] = newseg(SG_BSS, d, (b-d)>>PGSHIFT);
  409. /*
  410. * Move the stack
  411. */
  412. s = up->seg[ESEG];
  413. up->seg[ESEG] = 0;
  414. up->seg[SSEG] = s;
  415. qunlock(&up->seglock);
  416. poperror(); /* seglock */
  417. poperror(); /* elem */
  418. s->base = USTKTOP-USTKSIZE;
  419. s->top = USTKTOP;
  420. relocateseg(s, USTKTOP-TSTKTOP);
  421. /*
  422. * '/' processes are higher priority (hack to make /ip more responsive).
  423. */
  424. if(devtab[tc->type]->dc == L'/')
  425. up->basepri = PriRoot;
  426. up->priority = up->basepri;
  427. poperror();
  428. cclose(tc);
  429. /*
  430. * At this point, the mmu contains info about the old address
  431. * space and needs to be flushed
  432. */
  433. flushmmu();
  434. qlock(&up->debug);
  435. up->nnote = 0;
  436. up->notify = 0;
  437. up->notified = 0;
  438. up->privatemem = 0;
  439. procsetup(up);
  440. qunlock(&up->debug);
  441. if(up->hang)
  442. up->procctl = Proc_stopme;
  443. return execregs(entry, ssize, nargs);
  444. }
  445. int
  446. shargs(char *s, int n, char **ap)
  447. {
  448. int i;
  449. s += 2;
  450. n -= 2; /* skip #! */
  451. for(i=0; s[i]!='\n'; i++)
  452. if(i == n-1)
  453. return 0;
  454. s[i] = 0;
  455. *ap = 0;
  456. i = 0;
  457. for(;;) {
  458. while(*s==' ' || *s=='\t')
  459. s++;
  460. if(*s == 0)
  461. break;
  462. i++;
  463. *ap++ = s;
  464. *ap = 0;
  465. while(*s && *s!=' ' && *s!='\t')
  466. s++;
  467. if(*s == 0)
  468. break;
  469. else
  470. *s++ = 0;
  471. }
  472. return i;
  473. }
  474. int
  475. return0(void*)
  476. {
  477. return 0;
  478. }
  479. long
  480. syssleep(ulong *arg)
  481. {
  482. int n;
  483. n = arg[0];
  484. if(n <= 0) {
  485. if (up->edf && (up->edf->flags & Admitted))
  486. edfyield();
  487. else
  488. yield();
  489. return 0;
  490. }
  491. if(n < TK2MS(1))
  492. n = TK2MS(1);
  493. tsleep(&up->sleep, return0, 0, n);
  494. return 0;
  495. }
  496. long
  497. sysalarm(ulong *arg)
  498. {
  499. return procalarm(arg[0]);
  500. }
  501. long
  502. sysexits(ulong *arg)
  503. {
  504. char *status;
  505. char *inval = "invalid exit string";
  506. char buf[ERRMAX];
  507. status = (char*)arg[0];
  508. if(status){
  509. if(waserror())
  510. status = inval;
  511. else{
  512. validaddr((ulong)status, 1, 0);
  513. if(vmemchr(status, 0, ERRMAX) == 0){
  514. memmove(buf, status, ERRMAX);
  515. buf[ERRMAX-1] = 0;
  516. status = buf;
  517. }
  518. poperror();
  519. }
  520. }
  521. pexit(status, 1);
  522. return 0; /* not reached */
  523. }
  524. long
  525. sys_wait(ulong *arg)
  526. {
  527. int pid;
  528. Waitmsg w;
  529. OWaitmsg *ow;
  530. if(arg[0] == 0)
  531. return pwait(nil);
  532. validaddr(arg[0], sizeof(OWaitmsg), 1);
  533. evenaddr(arg[0]);
  534. pid = pwait(&w);
  535. if(pid >= 0){
  536. ow = (OWaitmsg*)arg[0];
  537. readnum(0, ow->pid, NUMSIZE, w.pid, NUMSIZE);
  538. readnum(0, ow->time+TUser*NUMSIZE, NUMSIZE, w.time[TUser], NUMSIZE);
  539. readnum(0, ow->time+TSys*NUMSIZE, NUMSIZE, w.time[TSys], NUMSIZE);
  540. readnum(0, ow->time+TReal*NUMSIZE, NUMSIZE, w.time[TReal], NUMSIZE);
  541. strncpy(ow->msg, w.msg, sizeof(ow->msg));
  542. ow->msg[sizeof(ow->msg)-1] = '\0';
  543. }
  544. return pid;
  545. }
  546. long
  547. sysawait(ulong *arg)
  548. {
  549. int i;
  550. int pid;
  551. Waitmsg w;
  552. ulong n;
  553. n = arg[1];
  554. validaddr(arg[0], n, 1);
  555. pid = pwait(&w);
  556. if(pid < 0)
  557. return -1;
  558. i = snprint((char*)arg[0], n, "%d %lud %lud %lud %q",
  559. w.pid,
  560. w.time[TUser], w.time[TSys], w.time[TReal],
  561. w.msg);
  562. return i;
  563. }
  564. long
  565. sysdeath(ulong*)
  566. {
  567. pprint("deprecated system call\n");
  568. pexit("Suicide", 0);
  569. return 0; /* not reached */
  570. }
  571. void
  572. werrstr(char *fmt, ...)
  573. {
  574. va_list va;
  575. if(up == nil)
  576. return;
  577. va_start(va, fmt);
  578. vseprint(up->syserrstr, up->syserrstr+ERRMAX, fmt, va);
  579. va_end(va);
  580. }
  581. static long
  582. generrstr(char *buf, uint nbuf)
  583. {
  584. char tmp[ERRMAX];
  585. if(nbuf == 0)
  586. error(Ebadarg);
  587. validaddr((ulong)buf, nbuf, 1);
  588. if(nbuf > sizeof tmp)
  589. nbuf = sizeof tmp;
  590. memmove(tmp, buf, nbuf);
  591. /* make sure it's NUL-terminated */
  592. tmp[nbuf-1] = '\0';
  593. memmove(buf, up->syserrstr, nbuf);
  594. buf[nbuf-1] = '\0';
  595. memmove(up->syserrstr, tmp, nbuf);
  596. return 0;
  597. }
  598. long
  599. syserrstr(ulong *arg)
  600. {
  601. return generrstr((char*)arg[0], arg[1]);
  602. }
  603. /* compatibility for old binaries */
  604. long
  605. sys_errstr(ulong *arg)
  606. {
  607. return generrstr((char*)arg[0], 64);
  608. }
  609. long
  610. sysnotify(ulong *arg)
  611. {
  612. if(arg[0] != 0)
  613. validaddr(arg[0], sizeof(ulong), 0);
  614. up->notify = (int(*)(void*, char*))(arg[0]);
  615. return 0;
  616. }
  617. long
  618. sysnoted(ulong *arg)
  619. {
  620. if(arg[0]!=NRSTR && !up->notified)
  621. error(Egreg);
  622. return 0;
  623. }
  624. long
  625. syssegbrk(ulong *arg)
  626. {
  627. int i;
  628. ulong addr;
  629. Segment *s;
  630. addr = arg[0];
  631. for(i = 0; i < NSEG; i++) {
  632. s = up->seg[i];
  633. if(s == 0 || addr < s->base || addr >= s->top)
  634. continue;
  635. switch(s->type&SG_TYPE) {
  636. case SG_TEXT:
  637. case SG_DATA:
  638. case SG_STACK:
  639. error(Ebadarg);
  640. default:
  641. return ibrk(arg[1], i);
  642. }
  643. }
  644. error(Ebadarg);
  645. return 0; /* not reached */
  646. }
  647. long
  648. syssegattach(ulong *arg)
  649. {
  650. return segattach(up, arg[0], (char*)arg[1], arg[2], arg[3]);
  651. }
  652. long
  653. syssegdetach(ulong *arg)
  654. {
  655. int i;
  656. ulong addr;
  657. Segment *s;
  658. qlock(&up->seglock);
  659. if(waserror()){
  660. qunlock(&up->seglock);
  661. nexterror();
  662. }
  663. s = 0;
  664. addr = arg[0];
  665. for(i = 0; i < NSEG; i++)
  666. if(s = up->seg[i]) {
  667. qlock(&s->lk);
  668. if((addr >= s->base && addr < s->top) ||
  669. (s->top == s->base && addr == s->base))
  670. goto found;
  671. qunlock(&s->lk);
  672. }
  673. error(Ebadarg);
  674. found:
  675. /*
  676. * Check we are not detaching the initial stack segment.
  677. */
  678. if(s == up->seg[SSEG]){
  679. qunlock(&s->lk);
  680. error(Ebadarg);
  681. }
  682. up->seg[i] = 0;
  683. qunlock(&s->lk);
  684. putseg(s);
  685. qunlock(&up->seglock);
  686. poperror();
  687. /* Ensure we flush any entries from the lost segment */
  688. flushmmu();
  689. return 0;
  690. }
  691. long
  692. syssegfree(ulong *arg)
  693. {
  694. Segment *s;
  695. ulong from, to;
  696. from = arg[0];
  697. s = seg(up, from, 1);
  698. if(s == nil)
  699. error(Ebadarg);
  700. to = (from + arg[1]) & ~(BY2PG-1);
  701. from = PGROUND(from);
  702. if(to > s->top) {
  703. qunlock(&s->lk);
  704. error(Ebadarg);
  705. }
  706. mfreeseg(s, from, (to - from) / BY2PG);
  707. qunlock(&s->lk);
  708. flushmmu();
  709. return 0;
  710. }
  711. /* For binary compatibility */
  712. long
  713. sysbrk_(ulong *arg)
  714. {
  715. return ibrk(arg[0], BSEG);
  716. }
  717. long
  718. sysrendezvous(ulong *arg)
  719. {
  720. uintptr tag, val;
  721. Proc *p, **l;
  722. tag = arg[0];
  723. l = &REND(up->rgrp, tag);
  724. up->rendval = ~(uintptr)0;
  725. lock(up->rgrp);
  726. for(p = *l; p; p = p->rendhash) {
  727. if(p->rendtag == tag) {
  728. *l = p->rendhash;
  729. val = p->rendval;
  730. p->rendval = arg[1];
  731. while(p->mach != 0)
  732. ;
  733. ready(p);
  734. unlock(up->rgrp);
  735. return val;
  736. }
  737. l = &p->rendhash;
  738. }
  739. /* Going to sleep here */
  740. up->rendtag = tag;
  741. up->rendval = arg[1];
  742. up->rendhash = *l;
  743. *l = up;
  744. up->state = Rendezvous;
  745. unlock(up->rgrp);
  746. sched();
  747. return up->rendval;
  748. }
  749. /*
  750. * The implementation of semaphores is complicated by needing
  751. * to avoid rescheduling in syssemrelease, so that it is safe
  752. * to call from real-time processes. This means syssemrelease
  753. * cannot acquire any qlocks, only spin locks.
  754. *
  755. * Semacquire and semrelease must both manipulate the semaphore
  756. * wait list. Lock-free linked lists only exist in theory, not
  757. * in practice, so the wait list is protected by a spin lock.
  758. *
  759. * The semaphore value *addr is stored in user memory, so it
  760. * cannot be read or written while holding spin locks.
  761. *
  762. * Thus, we can access the list only when holding the lock, and
  763. * we can access the semaphore only when not holding the lock.
  764. * This makes things interesting. Note that sleep's condition function
  765. * is called while holding two locks - r and up->rlock - so it cannot
  766. * access the semaphore value either.
  767. *
  768. * An acquirer announces its intention to try for the semaphore
  769. * by putting a Sema structure onto the wait list and then
  770. * setting Sema.waiting. After one last check of semaphore,
  771. * the acquirer sleeps until Sema.waiting==0. A releaser of n
  772. * must wake up n acquirers who have Sema.waiting set. It does
  773. * this by clearing Sema.waiting and then calling wakeup.
  774. *
  775. * There are three interesting races here.
  776. * The first is that in this particular sleep/wakeup usage, a single
  777. * wakeup can rouse a process from two consecutive sleeps!
  778. * The ordering is:
  779. *
  780. * (a) set Sema.waiting = 1
  781. * (a) call sleep
  782. * (b) set Sema.waiting = 0
  783. * (a) check Sema.waiting inside sleep, return w/o sleeping
  784. * (a) try for semaphore, fail
  785. * (a) set Sema.waiting = 1
  786. * (a) call sleep
  787. * (b) call wakeup(a)
  788. * (a) wake up again
  789. *
  790. * This is okay - semacquire will just go around the loop
  791. * again. It does mean that at the top of the for(;;) loop in
  792. * semacquire, phore.waiting might already be set to 1.
  793. *
  794. * The second is that a releaser might wake an acquirer who is
  795. * interrupted before he can acquire the lock. Since
  796. * release(n) issues only n wakeup calls -- only n can be used
  797. * anyway -- if the interrupted process is not going to use his
  798. * wakeup call he must pass it on to another acquirer.
  799. *
  800. * The third race is similar to the second but more subtle. An
  801. * acquirer sets waiting=1 and then does a final canacquire()
  802. * before going to sleep. The opposite order would result in
  803. * missing wakeups that happen between canacquire and
  804. * waiting=1. (In fact, the whole point of Sema.waiting is to
  805. * avoid missing wakeups between canacquire() and sleep().) But
  806. * there can be spurious wakeups between a successful
  807. * canacquire() and the following semdequeue(). This wakeup is
  808. * not useful to the acquirer, since he has already acquired
  809. * the semaphore. Like in the previous case, though, the
  810. * acquirer must pass the wakeup call along.
  811. *
  812. * This is all rather subtle. The code below has been verified
  813. * with the spin model /sys/src/9/port/semaphore.p. The
  814. * original code anticipated the second race but not the first
  815. * or third, which were caught only with spin. The first race
  816. * is mentioned in /sys/doc/sleep.ps, but I'd forgotten about it.
  817. * It was lucky that my abstract model of sleep/wakeup still managed
  818. * to preserve that behavior.
  819. *
  820. * I remain slightly concerned about memory coherence
  821. * outside of locks. The spin model does not take
  822. * queued processor writes into account so we have to
  823. * think hard. The only variables accessed outside locks
  824. * are the semaphore value itself and the boolean flag
  825. * Sema.waiting. The value is only accessed with cmpswap,
  826. * whose job description includes doing the right thing as
  827. * far as memory coherence across processors. That leaves
  828. * Sema.waiting. To handle it, we call coherence() before each
  829. * read and after each write. - rsc
  830. */
  831. /* Add semaphore p with addr a to list in seg. */
  832. static void
  833. semqueue(Segment *s, long *a, Sema *p)
  834. {
  835. memset(p, 0, sizeof *p);
  836. p->addr = a;
  837. lock(&s->sema); /* uses s->sema.Rendez.Lock, but no one else is */
  838. p->next = &s->sema;
  839. p->prev = s->sema.prev;
  840. p->next->prev = p;
  841. p->prev->next = p;
  842. unlock(&s->sema);
  843. }
  844. /* Remove semaphore p from list in seg. */
  845. static void
  846. semdequeue(Segment *s, Sema *p)
  847. {
  848. lock(&s->sema);
  849. p->next->prev = p->prev;
  850. p->prev->next = p->next;
  851. unlock(&s->sema);
  852. }
  853. /* Wake up n waiters with addr a on list in seg. */
  854. static void
  855. semwakeup(Segment *s, long *a, long n)
  856. {
  857. Sema *p;
  858. lock(&s->sema);
  859. for(p=s->sema.next; p!=&s->sema && n>0; p=p->next){
  860. if(p->addr == a && p->waiting){
  861. p->waiting = 0;
  862. coherence();
  863. wakeup(p);
  864. n--;
  865. }
  866. }
  867. unlock(&s->sema);
  868. }
  869. /* Add delta to semaphore and wake up waiters as appropriate. */
  870. static long
  871. semrelease(Segment *s, long *addr, long delta)
  872. {
  873. long value;
  874. do
  875. value = *addr;
  876. while(!cmpswap(addr, value, value+delta));
  877. semwakeup(s, addr, delta);
  878. return value+delta;
  879. }
  880. /* Try to acquire semaphore using compare-and-swap */
  881. static int
  882. canacquire(long *addr)
  883. {
  884. long value;
  885. while((value=*addr) > 0)
  886. if(cmpswap(addr, value, value-1))
  887. return 1;
  888. return 0;
  889. }
  890. /* Should we wake up? */
  891. static int
  892. semawoke(void *p)
  893. {
  894. coherence();
  895. return !((Sema*)p)->waiting;
  896. }
  897. /* Acquire semaphore (subtract 1). */
  898. static int
  899. semacquire(Segment *s, long *addr, int block)
  900. {
  901. int acquired;
  902. Sema phore;
  903. if(canacquire(addr))
  904. return 1;
  905. if(!block)
  906. return 0;
  907. acquired = 0;
  908. semqueue(s, addr, &phore);
  909. for(;;){
  910. phore.waiting = 1;
  911. coherence();
  912. if(canacquire(addr)){
  913. acquired = 1;
  914. break;
  915. }
  916. if(waserror())
  917. break;
  918. sleep(&phore, semawoke, &phore);
  919. poperror();
  920. }
  921. semdequeue(s, &phore);
  922. coherence(); /* not strictly necessary due to lock in semdequeue */
  923. if(!phore.waiting)
  924. semwakeup(s, addr, 1);
  925. if(!acquired)
  926. nexterror();
  927. return 1;
  928. }
  929. long
  930. syssemacquire(ulong *arg)
  931. {
  932. int block;
  933. long *addr;
  934. Segment *s;
  935. validaddr(arg[0], sizeof(long), 1);
  936. evenaddr(arg[0]);
  937. addr = (long*)arg[0];
  938. block = arg[1];
  939. if((s = seg(up, (ulong)addr, 0)) == nil)
  940. error(Ebadarg);
  941. if(*addr < 0)
  942. error(Ebadarg);
  943. return semacquire(s, addr, block);
  944. }
  945. long
  946. syssemrelease(ulong *arg)
  947. {
  948. long *addr, delta;
  949. Segment *s;
  950. validaddr(arg[0], sizeof(long), 1);
  951. evenaddr(arg[0]);
  952. addr = (long*)arg[0];
  953. delta = arg[1];
  954. if((s = seg(up, (ulong)addr, 0)) == nil)
  955. error(Ebadarg);
  956. if(delta < 0 || *addr < 0)
  957. error(Ebadarg);
  958. return semrelease(s, addr, arg[1]);
  959. }