sysproc.c 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include "u.h"
  10. #include "tos.h"
  11. #include "../port/lib.h"
  12. #include "mem.h"
  13. #include "dat.h"
  14. #include "fns.h"
  15. #include "../port/error.h"
  16. #include "../port/edf.h"
  17. #include <trace.h>
  18. #undef DBG
  19. #define DBG if(0)print
  20. void
  21. sysrfork(Ar0* ar0, ...)
  22. {
  23. Proc *up = externup();
  24. Proc *p;
  25. int flag, i, n, pid;
  26. Fgrp *ofg;
  27. Pgrp *opg;
  28. Rgrp *org;
  29. Egrp *oeg;
  30. Mach *wm;
  31. va_list list;
  32. va_start(list, ar0);
  33. /*
  34. * int rfork(int);
  35. */
  36. flag = va_arg(list, int);
  37. va_end(list);
  38. /* Check flags before we commit */
  39. if((flag & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
  40. error(Ebadarg);
  41. if((flag & (RFNAMEG|RFCNAMEG)) == (RFNAMEG|RFCNAMEG))
  42. error(Ebadarg);
  43. if((flag & (RFENVG|RFCENVG)) == (RFENVG|RFCENVG))
  44. error(Ebadarg);
  45. if((flag & (RFPREPAGE|RFCPREPAGE)) == (RFPREPAGE|RFCPREPAGE))
  46. error(Ebadarg);
  47. if((flag & (RFCORE|RFCCORE)) == (RFCORE|RFCCORE))
  48. error(Ebadarg);
  49. if(flag & RFCORE && up->wired != nil)
  50. error("wired proc cannot move to ac");
  51. if((flag&RFPROC) == 0) {
  52. if(flag & (RFMEM|RFNOWAIT))
  53. error(Ebadarg);
  54. if(flag & (RFFDG|RFCFDG)) {
  55. ofg = up->fgrp;
  56. if(flag & RFFDG)
  57. up->fgrp = dupfgrp(ofg);
  58. else
  59. up->fgrp = dupfgrp(nil);
  60. closefgrp(ofg);
  61. }
  62. if(flag & (RFNAMEG|RFCNAMEG)) {
  63. opg = up->pgrp;
  64. up->pgrp = newpgrp();
  65. if(flag & RFNAMEG)
  66. pgrpcpy(up->pgrp, opg);
  67. /* inherit noattach */
  68. up->pgrp->noattach = opg->noattach;
  69. closepgrp(opg);
  70. }
  71. if(flag & RFNOMNT)
  72. up->pgrp->noattach = 1;
  73. if(flag & RFREND) {
  74. org = up->rgrp;
  75. up->rgrp = newrgrp();
  76. closergrp(org);
  77. }
  78. if(flag & (RFENVG|RFCENVG)) {
  79. oeg = up->egrp;
  80. up->egrp = smalloc(sizeof(Egrp));
  81. up->egrp->r.ref = 1;
  82. if(flag & RFENVG)
  83. envcpy(up->egrp, oeg);
  84. closeegrp(oeg);
  85. }
  86. if(flag & RFNOTEG)
  87. up->noteid = incref(&noteidalloc);
  88. if(flag & (RFPREPAGE|RFCPREPAGE)){
  89. up->prepagemem = flag&RFPREPAGE;
  90. nixprepage(-1);
  91. }
  92. if(flag & RFCORE){
  93. up->ac = getac(up, -1);
  94. up->procctl = Proc_toac;
  95. }else if(flag & RFCCORE){
  96. if(up->ac != nil)
  97. up->procctl = Proc_totc;
  98. }
  99. ar0->i = 0;
  100. return;
  101. }
  102. p = newproc();
  103. if(flag & RFCORE){
  104. if(!waserror()){
  105. p->ac = getac(p, -1);
  106. p->procctl = Proc_toac;
  107. poperror();
  108. }else{
  109. print("warning: rfork: no available ac for the child, it runs in the tc\n");
  110. p->procctl = 0;
  111. }
  112. }
  113. if(up->trace)
  114. p->trace = 1;
  115. p->scallnr = up->scallnr;
  116. memmove(p->arg, up->arg, sizeof(up->arg));
  117. p->nerrlab = 0;
  118. p->slash = up->slash;
  119. p->dot = up->dot;
  120. incref(&p->dot->r);
  121. memmove(p->note, up->note, sizeof(p->note));
  122. p->privatemem = up->privatemem;
  123. p->noswap = up->noswap;
  124. p->nnote = up->nnote;
  125. p->notified = 0;
  126. p->lastnote = up->lastnote;
  127. p->notify = up->notify;
  128. p->ureg = up->ureg;
  129. p->prepagemem = up->prepagemem;
  130. p->dbgreg = 0;
  131. /* Make a new set of memory segments */
  132. n = flag & RFMEM;
  133. qlock(&p->seglock);
  134. if(waserror()){
  135. qunlock(&p->seglock);
  136. nexterror();
  137. }
  138. for(i = 0; i < NSEG; i++)
  139. if(up->seg[i])
  140. p->seg[i] = dupseg(up->seg, i, n);
  141. qunlock(&p->seglock);
  142. poperror();
  143. /* File descriptors */
  144. if(flag & (RFFDG|RFCFDG)) {
  145. if(flag & RFFDG)
  146. p->fgrp = dupfgrp(up->fgrp);
  147. else
  148. p->fgrp = dupfgrp(nil);
  149. }
  150. else {
  151. p->fgrp = up->fgrp;
  152. incref(&p->fgrp->r);
  153. }
  154. /* Process groups */
  155. if(flag & (RFNAMEG|RFCNAMEG)) {
  156. p->pgrp = newpgrp();
  157. if(flag & RFNAMEG)
  158. pgrpcpy(p->pgrp, up->pgrp);
  159. /* inherit noattach */
  160. p->pgrp->noattach = up->pgrp->noattach;
  161. }
  162. else {
  163. p->pgrp = up->pgrp;
  164. incref(&p->pgrp->r);
  165. }
  166. if(flag & RFNOMNT)
  167. up->pgrp->noattach = 1;
  168. if(flag & RFREND)
  169. p->rgrp = newrgrp();
  170. else {
  171. incref(&up->rgrp->r);
  172. p->rgrp = up->rgrp;
  173. }
  174. /* Environment group */
  175. if(flag & (RFENVG|RFCENVG)) {
  176. p->egrp = smalloc(sizeof(Egrp));
  177. p->egrp->r.ref = 1;
  178. if(flag & RFENVG)
  179. envcpy(p->egrp, up->egrp);
  180. }
  181. else {
  182. p->egrp = up->egrp;
  183. incref(&p->egrp->r);
  184. }
  185. p->hang = up->hang;
  186. p->procmode = up->procmode;
  187. /* Craft a return frame which will cause the child to pop out of
  188. * the scheduler in user mode with the return register zero
  189. */
  190. sysrforkchild(p, up);
  191. p->parent = up;
  192. p->parentpid = up->pid;
  193. if(flag&RFNOWAIT)
  194. p->parentpid = 0;
  195. else {
  196. lock(&up->exl);
  197. up->nchild++;
  198. unlock(&up->exl);
  199. }
  200. if((flag&RFNOTEG) == 0)
  201. p->noteid = up->noteid;
  202. pid = p->pid;
  203. memset(p->time, 0, sizeof(p->time));
  204. p->time[TReal] = sys->ticks;
  205. if(flag & (RFPREPAGE|RFCPREPAGE)){
  206. p->prepagemem = flag&RFPREPAGE;
  207. /*
  208. * BUG: this is prepaging our memory, not
  209. * that of the child, but at least we
  210. * will do the copy on write.
  211. */
  212. nixprepage(-1);
  213. }
  214. kstrdup(&p->text, up->text);
  215. kstrdup(&p->user, up->user);
  216. /*
  217. * since the bss/data segments are now shareable,
  218. * any mmu info about this process is now stale
  219. * (i.e. has bad properties) and has to be discarded.
  220. */
  221. mmuflush();
  222. p->basepri = up->basepri;
  223. p->priority = up->basepri;
  224. p->fixedpri = up->fixedpri;
  225. p->mp = up->mp;
  226. wm = up->wired;
  227. if(wm)
  228. procwired(p, wm->machno);
  229. p->color = up->color;
  230. ready(p);
  231. sched();
  232. ar0->i = pid;
  233. }
  234. #if 0
  235. static uint64_t
  236. vl2be(uint64_t v)
  237. {
  238. uint8_t *p;
  239. p = (uint8_t*)&v;
  240. return ((uint64_t)((p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3])<<32)
  241. |((uint64_t)(p[4]<<24)|(p[5]<<16)|(p[6]<<8)|p[7]);
  242. }
  243. static uint32_t
  244. l2be(int32_t l)
  245. {
  246. uint8_t *cp;
  247. cp = (uint8_t*)&l;
  248. return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3];
  249. }
  250. #endif
  251. /*
  252. * flags can ONLY specify that you want an AC for you, or
  253. * that you want an XC for you.
  254. */
  255. static void
  256. execac(Ar0* ar0, int flags, char *ufile, char **argv)
  257. {
  258. Proc *up = externup();
  259. Fgrp *fg;
  260. Tos *tos;
  261. Chan *chan, *ichan;
  262. Image *img;
  263. Segment *s;
  264. Ldseg *ldseg;
  265. int argc, i, n, nldseg;
  266. char *a, *elem, *file, *p;
  267. char line[64], *progarg[sizeof(line)/2+1];
  268. int32_t hdrsz;
  269. uintptr_t entry, stack;
  270. file = nil;
  271. elem = nil;
  272. switch(flags){
  273. case EXTC:
  274. case EXXC:
  275. break;
  276. case EXAC:
  277. up->ac = getac(up, -1);
  278. break;
  279. default:
  280. error("unknown execac flag");
  281. }
  282. if(waserror()){
  283. DBG("execac: failing: %s\n", up->errstr);
  284. free(file);
  285. free(elem);
  286. if(flags == EXAC && up->ac != nil)
  287. up->ac->proc = nil;
  288. up->ac = nil;
  289. nexterror();
  290. }
  291. /*
  292. * Open the file, remembering the final element and the full name.
  293. */
  294. argc = 0;
  295. file = validnamedup(ufile, 1);
  296. DBG("execac: up %#p file %s\n", up, file);
  297. if(up->trace)
  298. proctracepid(up);
  299. ichan = namec(file, Aopen, OEXEC, 0);
  300. if(waserror()){
  301. iprint("ERROR ON OPEN\n");
  302. cclose(ichan);
  303. nexterror();
  304. }
  305. kstrdup(&elem, up->genbuf);
  306. /*
  307. * Read the header.
  308. * If it's a #!, fill in progarg[] with info then read a new header
  309. * from the file indicated by the #!.
  310. * The #! line must be less than sizeof(Exec) in size,
  311. * including the terminating \n.
  312. */
  313. hdrsz = ichan->dev->read(ichan, line, sizeof line, 0);
  314. if(hdrsz < 2)
  315. error(Ebadexec);
  316. if(line[0] == '#' && line[1] == '!'){
  317. p = memchr(line, '\n', MIN(sizeof line, hdrsz));
  318. if(p == nil)
  319. error(Ebadexec);
  320. *p = '\0';
  321. argc = tokenize(line+2, progarg, nelem(progarg));
  322. if(argc == 0)
  323. error(Ebadexec);
  324. /* The original file becomes an extra arg after #! line */
  325. progarg[argc++] = file;
  326. /*
  327. * Take the #! $0 as a file to open, and replace
  328. * $0 with the original path's name.
  329. */
  330. p = progarg[0];
  331. progarg[0] = elem;
  332. chan = nil; /* in case namec errors out */
  333. USED(chan);
  334. chan = namec(p, Aopen, OEXEC, 0);
  335. }else{
  336. chan = ichan;
  337. incref(&ichan->r);
  338. }
  339. /* chan is the chan to use, initial or not. ichan is irrelevant now */
  340. cclose(ichan);
  341. poperror();
  342. /*
  343. * #! has had its chance, now we need a real binary.
  344. */
  345. nldseg = elf64ldseg(chan, &entry, &ldseg, cputype, BIGPGSZ);
  346. if(nldseg == 0){
  347. print("execac: elf64ldseg returned 0 segs!\n");
  348. error(Ebadexec);
  349. }
  350. /* TODO(aki): not sure I see the point
  351. if(up->ac != nil && up->ac != machp())
  352. up->color = corecolor(up->ac->machno);
  353. else
  354. up->color = corecolor(machp()->machno);
  355. */
  356. /*
  357. * The new stack is temporarily mapped elsewhere.
  358. * The stack contains, in descending address order:
  359. * a structure containing housekeeping and profiling data (Tos);
  360. * argument strings;
  361. * array of vectors to the argument strings with a terminating
  362. * nil (argv).
  363. * When the exec is committed, this temporary stack is relocated
  364. * to become the actual stack segment.
  365. * The architecture-dependent code which jumps to the new image
  366. * will also push a count of the argument array onto the stack (argc).
  367. */
  368. qlock(&up->seglock);
  369. int sno = -1;
  370. if(waserror()){
  371. if(sno != -1 && up->seg[sno] != nil){
  372. putseg(up->seg[sno]);
  373. up->seg[sno] = nil;
  374. }
  375. qunlock(&up->seglock);
  376. nexterror();
  377. }
  378. for(i = 0; i < NSEG; i++)
  379. if(up->seg[i] == nil)
  380. break;
  381. if(i == NSEG)
  382. error("exeac: no free segment slots");
  383. sno = i;
  384. up->seg[sno] = newseg(SG_STACK|SG_READ|SG_WRITE, TSTKTOP-USTKSIZE, USTKSIZE/BIGPGSZ);
  385. up->seg[sno]->color = up->color;
  386. /*
  387. * Stack is a pointer into the temporary stack
  388. * segment, and will move as items are pushed.
  389. */
  390. stack = TSTKTOP-sizeof(Tos);
  391. /*
  392. * First, the top-of-stack structure.
  393. */
  394. tos = (Tos*)stack;
  395. tos->cyclefreq = sys->cyclefreq;
  396. cycles((uint64_t*)&tos->pcycles);
  397. tos->pcycles = -tos->pcycles;
  398. tos->kcycles = tos->pcycles;
  399. tos->clock = 0;
  400. /*
  401. * Next push any arguments found from a #! header.
  402. */
  403. for(i = 0; i < argc; i++){
  404. n = strlen(progarg[i])+1;
  405. stack -= n;
  406. memmove(UINT2PTR(stack), progarg[i], n);
  407. }
  408. /*
  409. * Copy the strings pointed to by the syscall argument argv into
  410. * the temporary stack segment, being careful to check
  411. * the strings argv points to are valid.
  412. */
  413. for(i = 0;; i++, argv++){
  414. a = *(char**)validaddr(argv, sizeof(char**), 0);
  415. if(a == nil)
  416. break;
  417. a = validaddr(a, 1, 0);
  418. n = ((char*)vmemchr(a, 0, 0x7fffffff) - a) + 1;
  419. /*
  420. * This futzing is so argv[0] gets validated even
  421. * though it will be thrown away if this is a shell
  422. * script.
  423. */
  424. if(argc > 0 && i == 0)
  425. continue;
  426. /*
  427. * Before copying the string into the temporary stack,
  428. * which might involve a demand-page, check the string
  429. * will not overflow the bottom of the stack.
  430. */
  431. stack -= n;
  432. if(stack < TSTKTOP-USTKSIZE)
  433. error(Enovmem);
  434. p = UINT2PTR(stack);
  435. memmove(p, a, n);
  436. p[n-1] = 0;
  437. argc++;
  438. }
  439. if(argc < 1)
  440. error(Ebadexec);
  441. /*
  442. * Before pushing the argument pointers onto the temporary stack,
  443. * which might involve a demand-page, check there is room for the
  444. * terminating nil pointer, plus pointers, plus some slop for however
  445. * argc might be passed on the stack by sysexecregs (give a page
  446. * of slop, it is an overestimate, but why not).
  447. * Sysexecstack does any architecture-dependent stack alignment.
  448. * Keep a copy of the start of the argument strings before alignment
  449. * so up->args can be created later.
  450. * Although the argument vectors are being pushed onto the stack in
  451. * the temporary segment, the values must be adjusted to reflect
  452. * the segment address after it replaces the current SSEG.
  453. */
  454. a = p = UINT2PTR(stack);
  455. stack = sysexecstack(stack, argc);
  456. if(stack-(argc+2)*sizeof(char**)-BIGPGSZ < TSTKTOP-USTKSIZE) {
  457. //iprint("stck too small?\n");
  458. error(Ebadexec);
  459. }
  460. argv = (char**)stack;
  461. *--argv = nil;
  462. for(i = 0; i < argc; i++){
  463. *--argv = p + (USTKTOP-TSTKTOP);
  464. p += strlen(p) + 1;
  465. }
  466. *--argv = (void *)(uintptr_t) argc;
  467. /*
  468. * Make a good faith copy of the args in up->args using the strings
  469. * in the temporary stack segment. The length must be > 0 as it
  470. * includes the \0 on the last argument and argc was checked earlier
  471. * to be > 0. After the memmove, compensate for any UTF character
  472. * boundary before placing the terminating \0.
  473. */
  474. n = p - a;
  475. if(n <= 0)
  476. error(Egreg);
  477. if(n > 128)
  478. n = 128;
  479. p = smalloc(n);
  480. if(waserror()){
  481. free(p);
  482. nexterror();
  483. }
  484. memmove(p, a, n);
  485. while(n > 0 && (p[n-1] & 0xc0) == 0x80)
  486. n--;
  487. p[n-1] = '\0';
  488. /*
  489. * All the argument processing is now done, ready to commit.
  490. */
  491. free(up->text);
  492. up->text = elem;
  493. elem = nil;
  494. free(up->args);
  495. up->args = p;
  496. up->nargs = n;
  497. poperror(); /* p (up->args) */
  498. /*
  499. * Close on exec
  500. */
  501. fg = up->fgrp;
  502. for(i=0; i<=fg->maxfd; i++)
  503. fdclose(i, CCEXEC);
  504. /*
  505. * Free old memory, except for the temp stack (obviously)
  506. */
  507. s = up->seg[sno];
  508. for(i = 0; i < NSEG; i++) {
  509. if(up->seg[i] != s)
  510. putseg(up->seg[i]);
  511. up->seg[i] = nil;
  512. }
  513. /* put the stack in first */
  514. sno = 0;
  515. up->seg[sno++] = s;
  516. s->base = USTKTOP-USTKSIZE;
  517. s->top = USTKTOP;
  518. relocateseg(s, USTKTOP-TSTKTOP);
  519. img = nil;
  520. uintptr_t datalim;
  521. datalim = 0;
  522. for(i = 0; i < nldseg; i++){
  523. if(img == nil){
  524. img = attachimage(ldseg[i].type, chan, up->color,
  525. ldseg[i].pg0vaddr,
  526. (ldseg[i].pg0off+ldseg[i].memsz+BIGPGSZ-1)/BIGPGSZ
  527. );
  528. s = img->s;
  529. s->flushme = 1;
  530. if(img->color != up->color)
  531. up->color = img->color;
  532. unlock(&img->r.l);
  533. } else {
  534. s = newseg(ldseg[i].type, ldseg[i].pg0vaddr, (ldseg[i].pg0off+ldseg[i].memsz+BIGPGSZ-1)/BIGPGSZ);
  535. s->color = up->color;
  536. incref(&img->r);
  537. s->image = img;
  538. }
  539. s->ldseg = ldseg[i];
  540. up->seg[sno++] = s;
  541. if(datalim < ldseg[i].pg0vaddr+ldseg[i].memsz)
  542. datalim = ldseg[i].pg0vaddr+ldseg[i].memsz;
  543. }
  544. /* BSS. Zero fill on demand for TS */
  545. s = newseg(SG_BSS|SG_READ|SG_WRITE, (datalim + BIGPGSZ-1) & ~(BIGPGSZ-1), 0);
  546. up->seg[sno++] = s;
  547. s->color= up->color;
  548. for(i = 0; i < sno; i++){
  549. s = up->seg[i];
  550. DBG(
  551. "execac %d %s(%c%c%c) %p:%p va %p off %p fsz %d msz %d\n",
  552. up->pid, segtypes[s->type & SG_TYPE],
  553. (s->type & SG_READ) != 0 ? 'r' : '-',
  554. (s->type & SG_WRITE) != 0 ? 'w' : '-',
  555. (s->type & SG_EXEC) != 0 ? 'x' : '-',
  556. s->base, s->top,
  557. s->ldseg.pg0vaddr+s->ldseg.pg0off,
  558. s->ldseg.pg0fileoff+s->ldseg.pg0off,
  559. s->ldseg.filesz,
  560. s->ldseg.memsz
  561. );
  562. }
  563. /* the color of the stack was decided when we created it before,
  564. * it may have nothing to do with the color of other segments.
  565. */
  566. qunlock(&up->seglock);
  567. poperror(); /* seglock */
  568. /*
  569. * '/' processes are higher priority
  570. * aki: why bother?
  571. *
  572. * if(chan->dev->dc == L'/')
  573. * up->basepri = PriRoot;
  574. */
  575. up->priority = up->basepri;
  576. poperror(); /* chan, elem, file */
  577. cclose(chan);
  578. free(file);
  579. /*
  580. * At this point, the mmu contains info about the old address
  581. * space and needs to be flushed
  582. */
  583. mmuflush();
  584. if(up->prepagemem || flags == EXAC)
  585. nixprepage(-1);
  586. qlock(&up->debug);
  587. up->nnote = 0;
  588. up->notify = 0;
  589. up->notified = 0;
  590. up->privatemem = 0;
  591. sysprocsetup(up);
  592. qunlock(&up->debug);
  593. if(up->hang)
  594. up->procctl = Proc_stopme;
  595. /* we need to compte the value of &argv in user mode and then push that. */
  596. ar0->v = sysexecregs(entry, TSTKTOP - PTR2UINT(argv), ((void *)tos) + (USTKTOP-TSTKTOP)/sizeof(void *));
  597. if(flags == EXAC){
  598. up->procctl = Proc_toac;
  599. up->prepagemem = 1;
  600. }
  601. }
  602. void
  603. sysexecac(Ar0* ar0, ...)
  604. {
  605. int flags;
  606. char *file, **argv;
  607. va_list list;
  608. va_start(list, ar0);
  609. /*
  610. * void* execac(int flags, char* name, char* argv[]);
  611. */
  612. flags = va_arg(list, unsigned int);
  613. file = va_arg(list, char*);
  614. file = validaddr(file, 1, 0);
  615. argv = va_arg(list, char**);
  616. va_end(list);
  617. evenaddr(PTR2UINT(argv));
  618. execac(ar0, flags, file, argv);
  619. }
  620. void
  621. sysexec(Ar0* ar0, ...)
  622. {
  623. char *file, **argv;
  624. va_list list;
  625. va_start(list, ar0);
  626. /*
  627. * void* exec(char* name, char* argv[]);
  628. */
  629. file = va_arg(list, char*);
  630. file = validaddr(file, 1, 0);
  631. argv = va_arg(list, char**);
  632. va_end(list);
  633. evenaddr(PTR2UINT(argv));
  634. execac(ar0, EXTC, file, argv);
  635. }
  636. void
  637. sysr1(Ar0* ar, ...)
  638. {
  639. print("sysr1() called. recompile your binary\n");
  640. }
  641. void
  642. sysnixsyscall(Ar0* ar, ...)
  643. {
  644. print("nixsyscall() called. recompile your binary\n");
  645. }
  646. int
  647. return0(void* v)
  648. {
  649. return 0;
  650. }
  651. void
  652. syssleep(Ar0* ar0, ...)
  653. {
  654. Proc *up = externup();
  655. int64_t ms;
  656. va_list list;
  657. va_start(list, ar0);
  658. /*
  659. * int sleep(long millisecs);
  660. */
  661. ms = va_arg(list, int64_t);
  662. va_end(list);
  663. ar0->i = 0;
  664. if(ms <= 0) {
  665. if (up->edf && (up->edf->flags & Admitted))
  666. edfyield();
  667. else
  668. yield();
  669. return;
  670. }
  671. if(ms < TK2MS(1))
  672. ms = TK2MS(1);
  673. tsleep(&up->sleep, return0, 0, ms);
  674. }
  675. void
  676. sysalarm(Ar0* ar0, ...)
  677. {
  678. unsigned long ms;
  679. va_list list;
  680. va_start(list, ar0);
  681. /*
  682. * long alarm(unsigned long millisecs);
  683. * Odd argument type...
  684. */
  685. ms = va_arg(list, unsigned long);
  686. va_end(list);
  687. ar0->l = procalarm(ms);
  688. }
  689. void
  690. sysexits(Ar0* ar0, ...)
  691. {
  692. Proc *up = externup();
  693. char *status;
  694. char *inval = "invalid exit string";
  695. char buf[ERRMAX];
  696. va_list list;
  697. va_start(list, ar0);
  698. /*
  699. * void exits(char *msg);
  700. */
  701. status = va_arg(list, char*);
  702. va_end(list);
  703. if(status){
  704. if(waserror())
  705. status = inval;
  706. else{
  707. status = validaddr(status, 1, 0);
  708. if(vmemchr(status, 0, ERRMAX) == 0){
  709. memmove(buf, status, ERRMAX);
  710. buf[ERRMAX-1] = 0;
  711. status = buf;
  712. }
  713. poperror();
  714. }
  715. }
  716. pexit(status, 1);
  717. }
  718. void
  719. sys_wait(Ar0* ar0, ...)
  720. {
  721. int pid;
  722. Waitmsg w;
  723. OWaitmsg *ow;
  724. va_list list;
  725. va_start(list, ar0);
  726. /*
  727. * int wait(Waitmsg* w);
  728. *
  729. * Deprecated; backwards compatibility only.
  730. */
  731. ow = va_arg(list, OWaitmsg*);
  732. va_end(list);
  733. if(ow == nil){
  734. ar0->i = pwait(nil);
  735. return;
  736. }
  737. ow = validaddr(ow, sizeof(OWaitmsg), 1);
  738. evenaddr(PTR2UINT(ow));
  739. pid = pwait(&w);
  740. if(pid >= 0){
  741. readnum(0, ow->pid, NUMSIZE, w.pid, NUMSIZE);
  742. readnum(0, ow->time+TUser*NUMSIZE, NUMSIZE, w.time[TUser], NUMSIZE);
  743. readnum(0, ow->time+TSys*NUMSIZE, NUMSIZE, w.time[TSys], NUMSIZE);
  744. readnum(0, ow->time+TReal*NUMSIZE, NUMSIZE, w.time[TReal], NUMSIZE);
  745. strncpy(ow->msg, w.msg, sizeof(ow->msg));
  746. ow->msg[sizeof(ow->msg)-1] = '\0';
  747. }
  748. ar0->i = pid;
  749. }
  750. void
  751. sysawait(Ar0* ar0, ...)
  752. {
  753. int i;
  754. int pid;
  755. Waitmsg w;
  756. usize n;
  757. char *p;
  758. va_list list;
  759. va_start(list, ar0);
  760. /*
  761. * int await(char* s, int n);
  762. * should really be
  763. * usize await(char* s, usize n);
  764. */
  765. p = va_arg(list, char*);
  766. n = va_arg(list, int32_t);
  767. va_end(list);
  768. p = validaddr(p, n, 1);
  769. pid = pwait(&w);
  770. if(pid < 0){
  771. ar0->i = -1;
  772. return;
  773. }
  774. i = snprint(p, n, "%d %lu %lu %lu %q",
  775. w.pid,
  776. w.time[TUser], w.time[TSys], w.time[TReal],
  777. w.msg);
  778. ar0->i = i;
  779. }
  780. void
  781. werrstr(char *fmt, ...)
  782. {
  783. Proc *up = externup();
  784. va_list va;
  785. if(up == nil)
  786. return;
  787. va_start(va, fmt);
  788. vseprint(up->syserrstr, up->syserrstr+ERRMAX, fmt, va);
  789. va_end(va);
  790. }
  791. static void
  792. generrstr(char *buf, int32_t n)
  793. {
  794. Proc *up = externup();
  795. char *p, tmp[ERRMAX];
  796. if(n <= 0)
  797. error(Ebadarg);
  798. p = validaddr(buf, n, 1);
  799. if(n > sizeof tmp)
  800. n = sizeof tmp;
  801. memmove(tmp, p, n);
  802. /* make sure it's NUL-terminated */
  803. tmp[n-1] = '\0';
  804. memmove(p, up->syserrstr, n);
  805. p[n-1] = '\0';
  806. memmove(up->syserrstr, tmp, n);
  807. }
  808. void
  809. syserrstr(Ar0* ar0, ...)
  810. {
  811. char *err;
  812. usize nerr;
  813. va_list list;
  814. va_start(list, ar0);
  815. /*
  816. * int errstr(char* err, uint nerr);
  817. * should really be
  818. * usize errstr(char* err, usize nerr);
  819. * but errstr always returns 0.
  820. */
  821. err = va_arg(list, char*);
  822. nerr = va_arg(list, usize);
  823. va_end(list);
  824. generrstr(err, nerr);
  825. ar0->i = 0;
  826. }
  827. void
  828. sys_errstr(Ar0* ar0, ...)
  829. {
  830. char *p;
  831. va_list list;
  832. va_start(list, ar0);
  833. /*
  834. * int errstr(char* err);
  835. *
  836. * Deprecated; backwards compatibility only.
  837. */
  838. p = va_arg(list, char*);
  839. va_end(list);
  840. generrstr(p, 64);
  841. ar0->i = 0;
  842. }
  843. void
  844. sysnotify(Ar0* ar0, ...)
  845. {
  846. Proc *up = externup();
  847. void (*f)(void*, char*);
  848. va_list list;
  849. va_start(list, ar0);
  850. /*
  851. * int notify(void (*f)(void*, char*));
  852. */
  853. f = (void (*)(void*, char*))va_arg(list, void*);
  854. va_end(list);
  855. if(f != nil)
  856. validaddr(f, sizeof(void (*)(void*, char*)), 0);
  857. up->notify = f;
  858. ar0->i = 0;
  859. }
  860. void
  861. sysnoted(Ar0* ar0, ...)
  862. {
  863. Proc *up = externup();
  864. int v;
  865. va_list list;
  866. va_start(list, ar0);
  867. /*
  868. * int noted(int v);
  869. */
  870. v = va_arg(list, int);
  871. va_end(list);
  872. if(v != NRSTR && !up->notified)
  873. error(Egreg);
  874. ar0->i = 0;
  875. }
  876. void
  877. sysr0(Ar0* ar0, ...)
  878. {
  879. Proc *up = externup();
  880. dumpgpr(up->ureg);
  881. ar0->i = 0;
  882. }
  883. void
  884. sysrendezvous(Ar0* ar0, ...)
  885. {
  886. Proc *up = externup();
  887. Proc *p, **l;
  888. uintptr_t tag, val;
  889. va_list list;
  890. va_start(list, ar0);
  891. /*
  892. * void* rendezvous(void*, void*);
  893. */
  894. tag = PTR2UINT(va_arg(list, void*));
  895. l = &REND(up->rgrp, tag);
  896. up->rendval = ~0;
  897. lock(&up->rgrp->r.l);
  898. for(p = *l; p; p = p->rendhash) {
  899. if(p->rendtag == tag) {
  900. *l = p->rendhash;
  901. val = p->rendval;
  902. p->rendval = PTR2UINT(va_arg(list, void*));
  903. while(p->mach != 0)
  904. ;
  905. ready(p);
  906. unlock(&up->rgrp->r.l);
  907. ar0->v = UINT2PTR(val);
  908. return;
  909. }
  910. l = &p->rendhash;
  911. }
  912. /* Going to sleep here */
  913. up->rendtag = tag;
  914. up->rendval = PTR2UINT(va_arg(list, void*));
  915. va_end(list);
  916. up->rendhash = *l;
  917. *l = up;
  918. up->state = Rendezvous;
  919. if(up->trace)
  920. proctrace(up, SLock, 0);
  921. unlock(&up->rgrp->r.l);
  922. sched();
  923. ar0->v = UINT2PTR(up->rendval);
  924. }
  925. /*
  926. * The implementation of semaphores is complicated by needing
  927. * to avoid rescheduling in syssemrelease, so that it is safe
  928. * to call from real-time processes. This means syssemrelease
  929. * cannot acquire any qlocks, only spin locks.
  930. *
  931. * Semacquire and semrelease must both manipulate the semaphore
  932. * wait list. Lock-free linked lists only exist in theory, not
  933. * in practice, so the wait list is protected by a spin lock.
  934. *
  935. * The semaphore value *addr is stored in user memory, so it
  936. * cannot be read or written while holding spin locks.
  937. *
  938. * Thus, we can access the list only when holding the lock, and
  939. * we can access the semaphore only when not holding the lock.
  940. * This makes things interesting. Note that sleep's condition function
  941. * is called while holding two locks - r and up->rlock - so it cannot
  942. * access the semaphore value either.
  943. *
  944. * An acquirer announces its intention to try for the semaphore
  945. * by putting a Sema structure onto the wait list and then
  946. * setting Sema.waiting. After one last check of semaphore,
  947. * the acquirer sleeps until Sema.waiting==0. A releaser of n
  948. * must wake up n acquirers who have Sema.waiting set. It does
  949. * this by clearing Sema.waiting and then calling wakeup.
  950. *
  951. * There are three interesting races here.
  952. * The first is that in this particular sleep/wakeup usage, a single
  953. * wakeup can rouse a process from two consecutive sleeps!
  954. * The ordering is:
  955. *
  956. * (a) set Sema.waiting = 1
  957. * (a) call sleep
  958. * (b) set Sema.waiting = 0
  959. * (a) check Sema.waiting inside sleep, return w/o sleeping
  960. * (a) try for semaphore, fail
  961. * (a) set Sema.waiting = 1
  962. * (a) call sleep
  963. * (b) call wakeup(a)
  964. * (a) wake up again
  965. *
  966. * This is okay - semacquire will just go around the loop
  967. * again. It does mean that at the top of the for(;;) loop in
  968. * semacquire, phore.waiting might already be set to 1.
  969. *
  970. * The second is that a releaser might wake an acquirer who is
  971. * interrupted before he can acquire the lock. Since
  972. * release(n) issues only n wakeup calls -- only n can be used
  973. * anyway -- if the interrupted process is not going to use his
  974. * wakeup call he must pass it on to another acquirer.
  975. *
  976. * The third race is similar to the second but more subtle. An
  977. * acquirer sets waiting=1 and then does a final canacquire()
  978. * before going to sleep. The opposite order would result in
  979. * missing wakeups that happen between canacquire and
  980. * waiting=1. (In fact, the whole point of Sema.waiting is to
  981. * avoid missing wakeups between canacquire() and sleep().) But
  982. * there can be spurious wakeups between a successful
  983. * canacquire() and the following semdequeue(). This wakeup is
  984. * not useful to the acquirer, since he has already acquired
  985. * the semaphore. Like in the previous case, though, the
  986. * acquirer must pass the wakeup call along.
  987. *
  988. * This is all rather subtle. The code below has been verified
  989. * with the spin model /sys/src/9/port/semaphore.p. The
  990. * original code anticipated the second race but not the first
  991. * or third, which were caught only with spin. The first race
  992. * is mentioned in /sys/doc/sleep.ps, but I'd forgotten about it.
  993. * It was lucky that my abstract model of sleep/wakeup still managed
  994. * to preserve that behavior.
  995. *
  996. * I remain slightly concerned about memory coherence
  997. * outside of locks. The spin model does not take
  998. * queued processor writes into account so we have to
  999. * think hard. The only variables accessed outside locks
  1000. * are the semaphore value itself and the boolean flag
  1001. * Sema.waiting. The value is only accessed with CAS,
  1002. * whose job description includes doing the right thing as
  1003. * far as memory coherence across processors. That leaves
  1004. * Sema.waiting. To handle it, we call coherence() before each
  1005. * read and after each write. - rsc
  1006. */
  1007. /* Add semaphore p with addr a to list in seg. */
  1008. static void
  1009. semqueue(Segment* s, int* addr, Sema* p)
  1010. {
  1011. memset(p, 0, sizeof *p);
  1012. p->addr = addr;
  1013. lock(&s->sema.rend.l); /* uses s->sema.Rendez.Lock, but no one else is */
  1014. p->next = &s->sema;
  1015. p->prev = s->sema.prev;
  1016. p->next->prev = p;
  1017. p->prev->next = p;
  1018. unlock(&s->sema.rend.l);
  1019. }
  1020. /* Remove semaphore p from list in seg. */
  1021. static void
  1022. semdequeue(Segment* s, Sema* p)
  1023. {
  1024. lock(&s->sema.rend.l);
  1025. p->next->prev = p->prev;
  1026. p->prev->next = p->next;
  1027. unlock(&s->sema.rend.l);
  1028. }
  1029. /* Wake up n waiters with addr on list in seg. */
  1030. static void
  1031. semwakeup(Segment* s, int* addr, int n)
  1032. {
  1033. Sema *p;
  1034. lock(&s->sema.rend.l);
  1035. for(p = s->sema.next; p != &s->sema && n > 0; p = p->next){
  1036. if(p->addr == addr && p->waiting){
  1037. p->waiting = 0;
  1038. coherence();
  1039. wakeup(&p->rend);
  1040. n--;
  1041. }
  1042. }
  1043. unlock(&s->sema.rend.l);
  1044. }
  1045. /* Add delta to semaphore and wake up waiters as appropriate. */
  1046. static int
  1047. semrelease(Segment* s, int* addr, int delta)
  1048. {
  1049. int value;
  1050. do
  1051. value = *addr;
  1052. while(!CASW(addr, value, value+delta));
  1053. semwakeup(s, addr, delta);
  1054. return value+delta;
  1055. }
  1056. /* Try to acquire semaphore using compare-and-swap */
  1057. static int
  1058. canacquire(int* addr)
  1059. {
  1060. int value;
  1061. while((value = *addr) > 0){
  1062. if(CASW(addr, value, value-1))
  1063. return 1;
  1064. }
  1065. return 0;
  1066. }
  1067. /* Should we wake up? */
  1068. static int
  1069. semawoke(void* p)
  1070. {
  1071. coherence();
  1072. return !((Sema*)p)->waiting;
  1073. }
  1074. /* Acquire semaphore (subtract 1). */
  1075. static int
  1076. semacquire(Segment* s, int* addr, int block)
  1077. {
  1078. Proc *up = externup();
  1079. int acquired;
  1080. Sema phore;
  1081. if(canacquire(addr))
  1082. return 1;
  1083. if(!block)
  1084. return 0;
  1085. acquired = 0;
  1086. semqueue(s, addr, &phore);
  1087. for(;;){
  1088. phore.waiting = 1;
  1089. coherence();
  1090. if(canacquire(addr)){
  1091. acquired = 1;
  1092. break;
  1093. }
  1094. if(waserror())
  1095. break;
  1096. sleep(&phore.rend, semawoke, &phore);
  1097. poperror();
  1098. }
  1099. semdequeue(s, &phore);
  1100. coherence(); /* not strictly necessary due to lock in semdequeue */
  1101. if(!phore.waiting)
  1102. semwakeup(s, addr, 1);
  1103. if(!acquired)
  1104. nexterror();
  1105. return 1;
  1106. }
  1107. /* Acquire semaphore or time-out */
  1108. static int
  1109. tsemacquire(Segment* s, int* addr, int32_t ms)
  1110. {
  1111. Proc *up = externup();
  1112. int acquired;
  1113. uint32_t t;
  1114. Sema phore;
  1115. if(canacquire(addr))
  1116. return 1;
  1117. if(ms == 0)
  1118. return 0;
  1119. acquired = 0;
  1120. semqueue(s, addr, &phore);
  1121. for(;;){
  1122. phore.waiting = 1;
  1123. coherence();
  1124. if(canacquire(addr)){
  1125. acquired = 1;
  1126. break;
  1127. }
  1128. if(waserror())
  1129. break;
  1130. t = sys->ticks;
  1131. tsleep(&phore.rend, semawoke, &phore, ms);
  1132. ms -= TK2MS(sys->ticks-t);
  1133. poperror();
  1134. if(ms <= 0)
  1135. break;
  1136. }
  1137. semdequeue(s, &phore);
  1138. coherence(); /* not strictly necessary due to lock in semdequeue */
  1139. if(!phore.waiting)
  1140. semwakeup(s, addr, 1);
  1141. if(ms <= 0)
  1142. return 0;
  1143. if(!acquired)
  1144. nexterror();
  1145. return 1;
  1146. }
  1147. void
  1148. syssemacquire(Ar0* ar0, ...)
  1149. {
  1150. Proc *up = externup();
  1151. Segment *s;
  1152. int *addr, block;
  1153. va_list list;
  1154. va_start(list, ar0);
  1155. /*
  1156. * int semacquire(long* addr, int block);
  1157. * should be (and will be implemented below as) perhaps
  1158. * int semacquire(int* addr, int block);
  1159. */
  1160. addr = va_arg(list, int*);
  1161. addr = validaddr(addr, sizeof(int), 1);
  1162. evenaddr(PTR2UINT(addr));
  1163. block = va_arg(list, int);
  1164. va_end(list);
  1165. if((s = seg(up, PTR2UINT(addr), 0)) == nil)
  1166. error(Ebadarg);
  1167. if(*addr < 0)
  1168. error(Ebadarg);
  1169. ar0->i = semacquire(s, addr, block);
  1170. }
  1171. void
  1172. systsemacquire(Ar0* ar0, ...)
  1173. {
  1174. Proc *up = externup();
  1175. Segment *s;
  1176. int *addr, ms;
  1177. va_list list;
  1178. va_start(list, ar0);
  1179. /*
  1180. * int tsemacquire(long* addr, uint32_t ms);
  1181. * should be (and will be implemented below as) perhaps
  1182. * int tsemacquire(int* addr, uint32_t ms);
  1183. */
  1184. addr = va_arg(list, int*);
  1185. addr = validaddr(addr, sizeof(int), 1);
  1186. evenaddr(PTR2UINT(addr));
  1187. ms = va_arg(list, uint32_t);
  1188. va_end(list);
  1189. if((s = seg(up, PTR2UINT(addr), 0)) == nil)
  1190. error(Ebadarg);
  1191. if(*addr < 0)
  1192. error(Ebadarg);
  1193. ar0->i = tsemacquire(s, addr, ms);
  1194. }
  1195. void
  1196. syssemrelease(Ar0* ar0, ...)
  1197. {
  1198. Proc *up = externup();
  1199. Segment *s;
  1200. int *addr, delta;
  1201. va_list list;
  1202. va_start(list, ar0);
  1203. /*
  1204. * long semrelease(long* addr, long count);
  1205. * should be (and will be implemented below as) perhaps
  1206. * int semrelease(int* addr, int count);
  1207. */
  1208. addr = va_arg(list, int*);
  1209. addr = validaddr(addr, sizeof(int), 1);
  1210. evenaddr(PTR2UINT(addr));
  1211. delta = va_arg(list, int);
  1212. va_end(list);
  1213. if((s = seg(up, PTR2UINT(addr), 0)) == nil)
  1214. error(Ebadarg);
  1215. if(delta < 0 || *addr < 0)
  1216. error(Ebadarg);
  1217. ar0->i = semrelease(s, addr, delta);
  1218. }