proc.c 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585
  1. #include <u.h>
  2. #include "../port/lib.h"
  3. #include "mem.h"
  4. #include "dat.h"
  5. #include "fns.h"
  6. #include "../port/error.h"
  7. #include "edf.h"
  8. #include <trace.h>
  9. int coopsched;
  10. int schedgain = 30; /* units in seconds */
  11. int nrdy;
  12. Ref noteidalloc;
  13. void updatecpu(Proc*);
  14. int reprioritize(Proc*);
  15. ulong delayedscheds; /* statistics */
  16. long skipscheds;
  17. long preempts;
  18. ulong load;
  19. static Ref pidalloc;
  20. static struct Procalloc
  21. {
  22. Lock;
  23. Proc* ht[128];
  24. Proc* arena;
  25. Proc* free;
  26. } procalloc;
  27. enum
  28. {
  29. Q=10,
  30. DQ=4,
  31. Scaling=2,
  32. };
  33. Schedq runq[Nrq];
  34. ulong runvec;
  35. char *statename[] =
  36. { /* BUG: generate automatically */
  37. "Dead",
  38. "Moribund",
  39. "Ready",
  40. "Scheding",
  41. "Running",
  42. "Queueing",
  43. "QueueingR",
  44. "QueueingW",
  45. "Wakeme",
  46. "Broken",
  47. "Stopped",
  48. "Rendez",
  49. "Waitrelease",
  50. };
  51. static void pidhash(Proc*);
  52. static void pidunhash(Proc*);
  53. static void rebalance(void);
  54. /*
  55. * Always splhi()'ed.
  56. */
  57. void
  58. schedinit(void) /* never returns */
  59. {
  60. Edf *e;
  61. setlabel(&m->sched);
  62. if(up) {
  63. if((e = up->edf) && (e->flags & Admitted))
  64. edfrecord(up);
  65. m->proc = 0;
  66. switch(up->state) {
  67. case Running:
  68. ready(up);
  69. break;
  70. case Moribund:
  71. up->state = Dead;
  72. edfstop(up);
  73. if (up->edf)
  74. free(up->edf);
  75. up->edf = nil;
  76. /*
  77. * Holding locks from pexit:
  78. * procalloc
  79. * palloc
  80. */
  81. mmurelease(up);
  82. up->qnext = procalloc.free;
  83. procalloc.free = up;
  84. unlock(&palloc);
  85. unlock(&procalloc);
  86. break;
  87. }
  88. up->mach = nil;
  89. updatecpu(up);
  90. up = nil;
  91. }
  92. sched();
  93. }
  94. /*
  95. * If changing this routine, look also at sleep(). It
  96. * contains a copy of the guts of sched().
  97. */
  98. void
  99. sched(void)
  100. {
  101. Proc *p;
  102. if(m->ilockdepth)
  103. panic("ilockdepth %d, last lock 0x%p at 0x%lux, sched called from 0x%lux",
  104. m->ilockdepth, up?up->lastilock:nil,
  105. (up && up->lastilock)?up->lastilock->pc:0,
  106. getcallerpc(&p+2));
  107. if(up){
  108. if(up->nlocks.ref && up->state != Moribund && up->delaysched < 20){
  109. up->delaysched++;
  110. delayedscheds++;
  111. return;
  112. }
  113. up->delaysched = 0;
  114. splhi();
  115. /* statistics */
  116. m->cs++;
  117. procsave(up);
  118. if(setlabel(&up->sched)){
  119. procrestore(up);
  120. spllo();
  121. return;
  122. }
  123. gotolabel(&m->sched);
  124. }
  125. p = runproc();
  126. if(!p->edf){
  127. updatecpu(p);
  128. p->priority = reprioritize(p);
  129. }
  130. if(p != m->readied)
  131. m->schedticks = m->ticks + HZ/10;
  132. m->readied = 0;
  133. up = p;
  134. up->state = Running;
  135. up->mach = MACHP(m->machno);
  136. m->proc = up;
  137. mmuswitch(up);
  138. gotolabel(&up->sched);
  139. }
  140. int
  141. anyready(void)
  142. {
  143. return runvec;
  144. }
  145. int
  146. anyhigher(void)
  147. {
  148. return runvec & ~((1<<(up->priority+1))-1);
  149. }
  150. /*
  151. * here once per clock tick to see if we should resched
  152. */
  153. void
  154. hzsched(void)
  155. {
  156. /* once a second, rebalance will reprioritize ready procs */
  157. if(m->machno == 0)
  158. rebalance();
  159. /* unless preempted, get to run for at least 100ms */
  160. if(anyhigher()
  161. || (!up->fixedpri && m->ticks > m->schedticks && anyready())){
  162. m->readied = nil; /* avoid cooperative scheduling */
  163. up->delaysched++;
  164. }
  165. }
  166. /*
  167. * here at the end of non-clock interrupts to see if we should preempt the
  168. * current process. Returns 1 if preempted, 0 otherwise.
  169. */
  170. int
  171. preempted(void)
  172. {
  173. if(up && up->state == Running)
  174. if(up->preempted == 0)
  175. if(anyhigher())
  176. if(!active.exiting){
  177. m->readied = nil; /* avoid cooperative scheduling */
  178. up->preempted = 1;
  179. sched();
  180. splhi();
  181. up->preempted = 0;
  182. return 1;
  183. }
  184. return 0;
  185. }
  186. /*
  187. * Update the cpu time average for this particular process,
  188. * which is about to change from up -> not up or vice versa.
  189. * p->lastupdate is the last time an updatecpu happened.
  190. *
  191. * The cpu time average is a decaying average that lasts
  192. * about D clock ticks. D is chosen to be approximately
  193. * the cpu time of a cpu-intensive "quick job". A job has to run
  194. * for approximately D clock ticks before we home in on its
  195. * actual cpu usage. Thus if you manage to get in and get out
  196. * quickly, you won't be penalized during your burst. Once you
  197. * start using your share of the cpu for more than about D
  198. * clock ticks though, your p->cpu hits 1000 (1.0) and you end up
  199. * below all the other quick jobs. Interactive tasks, because
  200. * they basically always use less than their fair share of cpu,
  201. * will be rewarded.
  202. *
  203. * If the process has not been running, then we want to
  204. * apply the filter
  205. *
  206. * cpu = cpu * (D-1)/D
  207. *
  208. * n times, yielding
  209. *
  210. * cpu = cpu * ((D-1)/D)^n
  211. *
  212. * but D is big enough that this is approximately
  213. *
  214. * cpu = cpu * (D-n)/D
  215. *
  216. * so we use that instead.
  217. *
  218. * If the process has been running, we apply the filter to
  219. * 1 - cpu, yielding a similar equation. Note that cpu is
  220. * stored in fixed point (* 1000).
  221. *
  222. * Updatecpu must be called before changing up, in order
  223. * to maintain accurate cpu usage statistics. It can be called
  224. * at any time to bring the stats for a given proc up-to-date.
  225. */
  226. void
  227. updatecpu(Proc *p)
  228. {
  229. int n, t, ocpu;
  230. int D = schedgain*HZ*Scaling;
  231. if(p->edf)
  232. return;
  233. t = MACHP(0)->ticks*Scaling + Scaling/2;
  234. n = t - p->lastupdate;
  235. p->lastupdate = t;
  236. if(n == 0)
  237. return;
  238. if(n > D)
  239. n = D;
  240. ocpu = p->cpu;
  241. if(p != up)
  242. p->cpu = (ocpu*(D-n))/D;
  243. else{
  244. t = 1000 - ocpu;
  245. t = (t*(D-n))/D;
  246. p->cpu = 1000 - t;
  247. }
  248. //iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu);
  249. }
  250. /*
  251. * On average, p has used p->cpu of a cpu recently.
  252. * Its fair share is conf.nmach/m->load of a cpu. If it has been getting
  253. * too much, penalize it. If it has been getting not enough, reward it.
  254. * I don't think you can get much more than your fair share that
  255. * often, so most of the queues are for using less. Having a priority
  256. * of 3 means you're just right. Having a higher priority (up to p->basepri)
  257. * means you're not using as much as you could.
  258. */
  259. int
  260. reprioritize(Proc *p)
  261. {
  262. int fairshare, n, load, ratio;
  263. load = MACHP(0)->load;
  264. if(load == 0)
  265. return p->basepri;
  266. /*
  267. * fairshare = 1.000 * conf.nproc * 1.000/load,
  268. * except the decimal point is moved three places
  269. * on both load and fairshare.
  270. */
  271. fairshare = (conf.nmach*1000*1000)/load;
  272. n = p->cpu;
  273. if(n == 0)
  274. n = 1;
  275. ratio = (fairshare+n/2) / n;
  276. if(ratio > p->basepri)
  277. ratio = p->basepri;
  278. if(ratio < 0)
  279. panic("reprioritize");
  280. //iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio);
  281. return ratio;
  282. }
  283. /*
  284. * add a process to a scheduling queue
  285. */
  286. void
  287. queueproc(Schedq *rq, Proc *p)
  288. {
  289. int pri;
  290. pri = rq - runq;
  291. lock(runq);
  292. p->priority = pri;
  293. p->rnext = 0;
  294. if(rq->tail)
  295. rq->tail->rnext = p;
  296. else
  297. rq->head = p;
  298. rq->tail = p;
  299. rq->n++;
  300. nrdy++;
  301. runvec |= 1<<pri;
  302. unlock(runq);
  303. }
  304. /*
  305. * try to remove a process from a scheduling queue (called splhi)
  306. */
  307. Proc*
  308. dequeueproc(Schedq *rq, Proc *tp)
  309. {
  310. Proc *l, *p;
  311. if(!canlock(runq))
  312. return nil;
  313. /*
  314. * the queue may have changed before we locked runq,
  315. * refind the target process.
  316. */
  317. l = 0;
  318. for(p = rq->head; p; p = p->rnext){
  319. if(p == tp)
  320. break;
  321. l = p;
  322. }
  323. /*
  324. * p->mach==0 only when process state is saved
  325. */
  326. if(p == 0 || p->mach){
  327. unlock(runq);
  328. return nil;
  329. }
  330. if(p->rnext == 0)
  331. rq->tail = l;
  332. if(l)
  333. l->rnext = p->rnext;
  334. else
  335. rq->head = p->rnext;
  336. if(rq->head == nil)
  337. runvec &= ~(1<<(rq-runq));
  338. rq->n--;
  339. nrdy--;
  340. if(p->state != Ready)
  341. print("dequeueproc %s %lud %s\n", p->text, p->pid, statename[p->state]);
  342. unlock(runq);
  343. return p;
  344. }
  345. /*
  346. * ready(p) picks a new priority for a process and sticks it in the
  347. * runq for that priority.
  348. */
  349. void
  350. ready(Proc *p)
  351. {
  352. int s, pri;
  353. Schedq *rq;
  354. void (*pt)(Proc*, int, vlong);
  355. s = splhi();
  356. if(edfready(p)){
  357. splx(s);
  358. return;
  359. }
  360. if(up != p)
  361. m->readied = p; /* group scheduling */
  362. updatecpu(p);
  363. pri = reprioritize(p);
  364. p->priority = pri;
  365. rq = &runq[pri];
  366. p->state = Ready;
  367. queueproc(rq, p);
  368. pt = proctrace;
  369. if(pt)
  370. pt(p, SReady, 0);
  371. splx(s);
  372. }
  373. /*
  374. * yield the processor and drop our priority
  375. */
  376. void
  377. yield(void)
  378. {
  379. if(anyready()){
  380. /* pretend we just used 1/2 tick */
  381. up->lastupdate -= Scaling/2;
  382. sched();
  383. }
  384. }
  385. /*
  386. * recalculate priorities once a second. We need to do this
  387. * since priorities will otherwise only be recalculated when
  388. * the running process blocks.
  389. */
  390. ulong balancetime;
  391. static void
  392. rebalance(void)
  393. {
  394. int pri, npri, t, x;
  395. Schedq *rq;
  396. Proc *p;
  397. t = m->ticks;
  398. if(t - balancetime < HZ)
  399. return;
  400. balancetime = t;
  401. for(pri=0, rq=runq; pri<Npriq; pri++, rq++){
  402. another:
  403. p = rq->head;
  404. if(p == nil)
  405. continue;
  406. if(p->mp != MACHP(m->machno))
  407. continue;
  408. if(pri == p->basepri)
  409. continue;
  410. updatecpu(p);
  411. npri = reprioritize(p);
  412. if(npri != pri){
  413. x = splhi();
  414. p = dequeueproc(rq, p);
  415. if(p)
  416. queueproc(&runq[npri], p);
  417. splx(x);
  418. goto another;
  419. }
  420. }
  421. }
  422. /*
  423. * pick a process to run
  424. */
  425. Proc*
  426. runproc(void)
  427. {
  428. Schedq *rq;
  429. Proc *p;
  430. ulong start, now;
  431. int i;
  432. void (*pt)(Proc*, int, vlong);
  433. start = perfticks();
  434. /* cooperative scheduling until the clock ticks */
  435. if(coopsched && (p=m->readied) && p->mach==0 && p->state==Ready
  436. && runq[Nrq-1].head == nil && runq[Nrq-2].head == nil){
  437. skipscheds++;
  438. rq = &runq[p->priority];
  439. goto found;
  440. }
  441. preempts++;
  442. loop:
  443. /*
  444. * find a process that last ran on this processor (affinity),
  445. * or one that hasn't moved in a while (load balancing). Every
  446. * time around the loop affinity goes down.
  447. */
  448. spllo();
  449. for(i = 0;; i++){
  450. /*
  451. * find the highest priority target process that this
  452. * processor can run given affinity constraints.
  453. *
  454. */
  455. for(rq = &runq[Nrq-1]; rq >= runq; rq--){
  456. for(p = rq->head; p; p = p->rnext){
  457. if(p->mp == nil || p->mp == MACHP(m->machno)
  458. || (!p->wired && i > 0))
  459. goto found;
  460. }
  461. }
  462. /* waste time or halt the CPU */
  463. idlehands();
  464. /* remember how much time we're here */
  465. now = perfticks();
  466. m->perf.inidle += now-start;
  467. start = now;
  468. }
  469. found:
  470. splhi();
  471. p = dequeueproc(rq, p);
  472. if(p == nil)
  473. goto loop;
  474. p->state = Scheding;
  475. p->mp = MACHP(m->machno);
  476. if(edflock(p)){
  477. edfrun(p, rq == &runq[PriEdf]); /* start deadline timer and do admin */
  478. edfunlock();
  479. }
  480. pt = proctrace;
  481. if(pt)
  482. pt(p, SRun, 0);
  483. return p;
  484. }
  485. int
  486. canpage(Proc *p)
  487. {
  488. int ok = 0;
  489. splhi();
  490. lock(runq);
  491. /* Only reliable way to see if we are Running */
  492. if(p->mach == 0) {
  493. p->newtlb = 1;
  494. ok = 1;
  495. }
  496. unlock(runq);
  497. spllo();
  498. return ok;
  499. }
  500. Proc*
  501. newproc(void)
  502. {
  503. Proc *p;
  504. lock(&procalloc);
  505. for(;;) {
  506. if(p = procalloc.free)
  507. break;
  508. unlock(&procalloc);
  509. resrcwait("no procs");
  510. lock(&procalloc);
  511. }
  512. procalloc.free = p->qnext;
  513. unlock(&procalloc);
  514. p->state = Scheding;
  515. p->psstate = "New";
  516. p->mach = 0;
  517. p->qnext = 0;
  518. p->nchild = 0;
  519. p->nwait = 0;
  520. p->waitq = 0;
  521. p->parent = 0;
  522. p->pgrp = 0;
  523. p->egrp = 0;
  524. p->fgrp = 0;
  525. p->rgrp = 0;
  526. p->pdbg = 0;
  527. p->fpstate = FPinit;
  528. p->kp = 0;
  529. p->procctl = 0;
  530. p->notepending = 0;
  531. p->ureg = 0;
  532. p->privatemem = 0;
  533. p->noswap = 0;
  534. p->errstr = p->errbuf0;
  535. p->syserrstr = p->errbuf1;
  536. p->errbuf0[0] = '\0';
  537. p->errbuf1[0] = '\0';
  538. p->nlocks.ref = 0;
  539. p->delaysched = 0;
  540. p->trace = 0;
  541. kstrdup(&p->user, "*nouser");
  542. kstrdup(&p->text, "*notext");
  543. kstrdup(&p->args, "");
  544. p->nargs = 0;
  545. p->setargs = 0;
  546. memset(p->seg, 0, sizeof p->seg);
  547. p->pid = incref(&pidalloc);
  548. pidhash(p);
  549. p->noteid = incref(&noteidalloc);
  550. if(p->pid==0 || p->noteid==0)
  551. panic("pidalloc");
  552. if(p->kstack == 0)
  553. p->kstack = smalloc(KSTACK);
  554. /* sched params */
  555. p->mp = 0;
  556. p->wired = 0;
  557. procpriority(p, PriNormal, 0);
  558. p->cpu = 0;
  559. p->lastupdate = MACHP(0)->ticks*Scaling;
  560. p->edf = nil;
  561. return p;
  562. }
  563. /*
  564. * wire this proc to a machine
  565. */
  566. void
  567. procwired(Proc *p, int bm)
  568. {
  569. Proc *pp;
  570. int i;
  571. char nwired[MAXMACH];
  572. Mach *wm;
  573. if(bm < 0){
  574. /* pick a machine to wire to */
  575. memset(nwired, 0, sizeof(nwired));
  576. p->wired = 0;
  577. pp = proctab(0);
  578. for(i=0; i<conf.nproc; i++, pp++){
  579. wm = pp->wired;
  580. if(wm && pp->pid)
  581. nwired[wm->machno]++;
  582. }
  583. bm = 0;
  584. for(i=0; i<conf.nmach; i++)
  585. if(nwired[i] < nwired[bm])
  586. bm = i;
  587. } else {
  588. /* use the virtual machine requested */
  589. bm = bm % conf.nmach;
  590. }
  591. p->wired = MACHP(bm);
  592. p->mp = p->wired;
  593. }
  594. void
  595. procpriority(Proc *p, int pri, int fixed)
  596. {
  597. if(pri >= Npriq)
  598. pri = Npriq - 1;
  599. else if(pri < 0)
  600. pri = 0;
  601. p->basepri = pri;
  602. p->priority = pri;
  603. if(fixed){
  604. p->fixedpri = 1;
  605. } else {
  606. p->fixedpri = 0;
  607. }
  608. }
  609. void
  610. procinit0(void) /* bad planning - clashes with devproc.c */
  611. {
  612. Proc *p;
  613. int i;
  614. procalloc.free = xalloc(conf.nproc*sizeof(Proc));
  615. if(procalloc.free == nil){
  616. xsummary();
  617. panic("cannot allocate %lud procs (%ludMB)\n", conf.nproc, conf.nproc*sizeof(Proc)/(1024*1024));
  618. }
  619. procalloc.arena = procalloc.free;
  620. p = procalloc.free;
  621. for(i=0; i<conf.nproc-1; i++,p++)
  622. p->qnext = p+1;
  623. p->qnext = 0;
  624. }
  625. /*
  626. * sleep if a condition is not true. Another process will
  627. * awaken us after it sets the condition. When we awaken
  628. * the condition may no longer be true.
  629. *
  630. * we lock both the process and the rendezvous to keep r->p
  631. * and p->r synchronized.
  632. */
  633. void
  634. sleep(Rendez *r, int (*f)(void*), void *arg)
  635. {
  636. int s;
  637. void (*pt)(Proc*, int, vlong);
  638. s = splhi();
  639. if(up->nlocks.ref)
  640. print("process %lud sleeps with %lud locks held, last lock 0x%p locked at pc 0x%lux, sleep called from 0x%lux\n",
  641. up->pid, up->nlocks.ref, up->lastlock, up->lastlock->pc, getcallerpc(&r));
  642. lock(r);
  643. lock(&up->rlock);
  644. if(r->p){
  645. print("double sleep called from 0x%lux, %lud %lud\n", getcallerpc(&r), r->p->pid, up->pid);
  646. dumpstack();
  647. }
  648. /*
  649. * Wakeup only knows there may be something to do by testing
  650. * r->p in order to get something to lock on.
  651. * Flush that information out to memory in case the sleep is
  652. * committed.
  653. */
  654. r->p = up;
  655. if((*f)(arg) || up->notepending){
  656. /*
  657. * if condition happened or a note is pending
  658. * never mind
  659. */
  660. r->p = nil;
  661. unlock(&up->rlock);
  662. unlock(r);
  663. } else {
  664. /*
  665. * now we are committed to
  666. * change state and call scheduler
  667. */
  668. pt = proctrace;
  669. if(pt)
  670. pt(up, SSleep, 0);
  671. up->state = Wakeme;
  672. up->r = r;
  673. /* statistics */
  674. m->cs++;
  675. procsave(up);
  676. if(setlabel(&up->sched)) {
  677. /*
  678. * here when the process is awakened
  679. */
  680. procrestore(up);
  681. spllo();
  682. } else {
  683. /*
  684. * here to go to sleep (i.e. stop Running)
  685. */
  686. unlock(&up->rlock);
  687. unlock(r);
  688. gotolabel(&m->sched);
  689. }
  690. }
  691. if(up->notepending) {
  692. up->notepending = 0;
  693. splx(s);
  694. if(up->procctl == Proc_exitme && up->closingfgrp)
  695. forceclosefgrp();
  696. error(Eintr);
  697. }
  698. splx(s);
  699. }
  700. static int
  701. tfn(void *arg)
  702. {
  703. return up->trend == nil || up->tfn(arg);
  704. }
  705. void
  706. twakeup(Ureg*, Timer *t)
  707. {
  708. Proc *p;
  709. Rendez *trend;
  710. p = t->ta;
  711. trend = p->trend;
  712. p->trend = 0;
  713. if(trend)
  714. wakeup(trend);
  715. }
  716. void
  717. tsleep(Rendez *r, int (*fn)(void*), void *arg, ulong ms)
  718. {
  719. if (up->tt){
  720. print("tsleep: timer active: mode %d, tf 0x%lux\n", up->tmode, up->tf);
  721. timerdel(up);
  722. }
  723. up->tns = MS2NS(ms);
  724. up->tf = twakeup;
  725. up->tmode = Trelative;
  726. up->ta = up;
  727. up->trend = r;
  728. up->tfn = fn;
  729. timeradd(up);
  730. if(waserror()){
  731. timerdel(up);
  732. nexterror();
  733. }
  734. sleep(r, tfn, arg);
  735. if (up->tt)
  736. timerdel(up);
  737. up->twhen = 0;
  738. poperror();
  739. }
  740. /*
  741. * Expects that only one process can call wakeup for any given Rendez.
  742. * We hold both locks to ensure that r->p and p->r remain consistent.
  743. * Richard Miller has a better solution that doesn't require both to
  744. * be held simultaneously, but I'm a paranoid - presotto.
  745. */
  746. Proc*
  747. wakeup(Rendez *r)
  748. {
  749. Proc *p;
  750. int s;
  751. s = splhi();
  752. lock(r);
  753. p = r->p;
  754. if(p != nil){
  755. lock(&p->rlock);
  756. if(p->state != Wakeme || p->r != r)
  757. panic("wakeup: state");
  758. r->p = nil;
  759. p->r = nil;
  760. ready(p);
  761. unlock(&p->rlock);
  762. }
  763. unlock(r);
  764. splx(s);
  765. return p;
  766. }
  767. /*
  768. * if waking a sleeping process, this routine must hold both
  769. * p->rlock and r->lock. However, it can't know them in
  770. * the same order as wakeup causing a possible lock ordering
  771. * deadlock. We break the deadlock by giving up the p->rlock
  772. * lock if we can't get the r->lock and retrying.
  773. */
  774. int
  775. postnote(Proc *p, int dolock, char *n, int flag)
  776. {
  777. int s, ret;
  778. Rendez *r;
  779. Proc *d, **l;
  780. if(dolock)
  781. qlock(&p->debug);
  782. if(flag != NUser && (p->notify == 0 || p->notified))
  783. p->nnote = 0;
  784. ret = 0;
  785. if(p->nnote < NNOTE) {
  786. strcpy(p->note[p->nnote].msg, n);
  787. p->note[p->nnote++].flag = flag;
  788. ret = 1;
  789. }
  790. p->notepending = 1;
  791. if(dolock)
  792. qunlock(&p->debug);
  793. /* this loop is to avoid lock ordering problems. */
  794. for(;;){
  795. s = splhi();
  796. lock(&p->rlock);
  797. r = p->r;
  798. /* waiting for a wakeup? */
  799. if(r == nil)
  800. break; /* no */
  801. /* try for the second lock */
  802. if(canlock(r)){
  803. if(p->state != Wakeme || r->p != p)
  804. panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state);
  805. p->r = nil;
  806. r->p = nil;
  807. ready(p);
  808. unlock(r);
  809. break;
  810. }
  811. /* give other process time to get out of critical section and try again */
  812. unlock(&p->rlock);
  813. splx(s);
  814. sched();
  815. }
  816. unlock(&p->rlock);
  817. splx(s);
  818. if(p->state != Rendezvous)
  819. return ret;
  820. /* Try and pull out of a rendezvous */
  821. lock(p->rgrp);
  822. if(p->state == Rendezvous) {
  823. p->rendval = ~0;
  824. l = &REND(p->rgrp, p->rendtag);
  825. for(d = *l; d; d = d->rendhash) {
  826. if(d == p) {
  827. *l = p->rendhash;
  828. break;
  829. }
  830. l = &d->rendhash;
  831. }
  832. ready(p);
  833. }
  834. unlock(p->rgrp);
  835. return ret;
  836. }
  837. /*
  838. * weird thing: keep at most NBROKEN around
  839. */
  840. #define NBROKEN 4
  841. struct
  842. {
  843. QLock;
  844. int n;
  845. Proc *p[NBROKEN];
  846. }broken;
  847. void
  848. addbroken(Proc *p)
  849. {
  850. qlock(&broken);
  851. if(broken.n == NBROKEN) {
  852. ready(broken.p[0]);
  853. memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1));
  854. --broken.n;
  855. }
  856. broken.p[broken.n++] = p;
  857. qunlock(&broken);
  858. edfstop(up);
  859. p->state = Broken;
  860. p->psstate = 0;
  861. sched();
  862. }
  863. void
  864. unbreak(Proc *p)
  865. {
  866. int b;
  867. qlock(&broken);
  868. for(b=0; b < broken.n; b++)
  869. if(broken.p[b] == p) {
  870. broken.n--;
  871. memmove(&broken.p[b], &broken.p[b+1],
  872. sizeof(Proc*)*(NBROKEN-(b+1)));
  873. ready(p);
  874. break;
  875. }
  876. qunlock(&broken);
  877. }
  878. int
  879. freebroken(void)
  880. {
  881. int i, n;
  882. qlock(&broken);
  883. n = broken.n;
  884. for(i=0; i<n; i++) {
  885. ready(broken.p[i]);
  886. broken.p[i] = 0;
  887. }
  888. broken.n = 0;
  889. qunlock(&broken);
  890. return n;
  891. }
  892. void
  893. pexit(char *exitstr, int freemem)
  894. {
  895. Proc *p;
  896. Segment **s, **es;
  897. long utime, stime;
  898. Waitq *wq, *f, *next;
  899. Fgrp *fgrp;
  900. Egrp *egrp;
  901. Rgrp *rgrp;
  902. Pgrp *pgrp;
  903. Chan *dot;
  904. void (*pt)(Proc*, int, vlong);
  905. up->alarm = 0;
  906. if (up->tt)
  907. timerdel(up);
  908. pt = proctrace;
  909. if(pt)
  910. pt(up, SDead, 0);
  911. /* nil out all the resources under lock (free later) */
  912. qlock(&up->debug);
  913. fgrp = up->fgrp;
  914. up->fgrp = nil;
  915. egrp = up->egrp;
  916. up->egrp = nil;
  917. rgrp = up->rgrp;
  918. up->rgrp = nil;
  919. pgrp = up->pgrp;
  920. up->pgrp = nil;
  921. dot = up->dot;
  922. up->dot = nil;
  923. qunlock(&up->debug);
  924. if(fgrp)
  925. closefgrp(fgrp);
  926. if(egrp)
  927. closeegrp(egrp);
  928. if(rgrp)
  929. closergrp(rgrp);
  930. if(dot)
  931. cclose(dot);
  932. if(pgrp)
  933. closepgrp(pgrp);
  934. /*
  935. * if not a kernel process and have a parent,
  936. * do some housekeeping.
  937. */
  938. if(up->kp == 0) {
  939. p = up->parent;
  940. if(p == 0) {
  941. if(exitstr == 0)
  942. exitstr = "unknown";
  943. panic("boot process died: %s", exitstr);
  944. }
  945. while(waserror())
  946. ;
  947. wq = smalloc(sizeof(Waitq));
  948. poperror();
  949. wq->w.pid = up->pid;
  950. utime = up->time[TUser] + up->time[TCUser];
  951. stime = up->time[TSys] + up->time[TCSys];
  952. wq->w.time[TUser] = tk2ms(utime);
  953. wq->w.time[TSys] = tk2ms(stime);
  954. wq->w.time[TReal] = tk2ms(MACHP(0)->ticks - up->time[TReal]);
  955. if(exitstr && exitstr[0])
  956. snprint(wq->w.msg, sizeof(wq->w.msg), "%s %lud: %s", up->text, up->pid, exitstr);
  957. else
  958. wq->w.msg[0] = '\0';
  959. lock(&p->exl);
  960. /*
  961. * If my parent is no longer alive, or if there would be more
  962. * than 128 zombie child processes for my parent, then don't
  963. * leave a wait record behind. This helps prevent badly
  964. * written daemon processes from accumulating lots of wait
  965. * records.
  966. */
  967. if(p->pid == up->parentpid && p->state != Broken && p->nwait < 128) {
  968. p->nchild--;
  969. p->time[TCUser] += utime;
  970. p->time[TCSys] += stime;
  971. wq->next = p->waitq;
  972. p->waitq = wq;
  973. p->nwait++;
  974. wakeup(&p->waitr);
  975. unlock(&p->exl);
  976. }
  977. else {
  978. unlock(&p->exl);
  979. free(wq);
  980. }
  981. }
  982. if(!freemem)
  983. addbroken(up);
  984. qlock(&up->seglock);
  985. es = &up->seg[NSEG];
  986. for(s = up->seg; s < es; s++) {
  987. if(*s) {
  988. putseg(*s);
  989. *s = 0;
  990. }
  991. }
  992. qunlock(&up->seglock);
  993. lock(&up->exl); /* Prevent my children from leaving waits */
  994. pidunhash(up);
  995. up->pid = 0;
  996. wakeup(&up->waitr);
  997. unlock(&up->exl);
  998. for(f = up->waitq; f; f = next) {
  999. next = f->next;
  1000. free(f);
  1001. }
  1002. /* release debuggers */
  1003. qlock(&up->debug);
  1004. if(up->pdbg) {
  1005. wakeup(&up->pdbg->sleep);
  1006. up->pdbg = 0;
  1007. }
  1008. qunlock(&up->debug);
  1009. /* Sched must not loop for these locks */
  1010. lock(&procalloc);
  1011. lock(&palloc);
  1012. edfstop(up);
  1013. up->state = Moribund;
  1014. sched();
  1015. panic("pexit");
  1016. }
  1017. int
  1018. haswaitq(void *x)
  1019. {
  1020. Proc *p;
  1021. p = (Proc *)x;
  1022. return p->waitq != 0;
  1023. }
  1024. ulong
  1025. pwait(Waitmsg *w)
  1026. {
  1027. ulong cpid;
  1028. Waitq *wq;
  1029. if(!canqlock(&up->qwaitr))
  1030. error(Einuse);
  1031. if(waserror()) {
  1032. qunlock(&up->qwaitr);
  1033. nexterror();
  1034. }
  1035. lock(&up->exl);
  1036. if(up->nchild == 0 && up->waitq == 0) {
  1037. unlock(&up->exl);
  1038. error(Enochild);
  1039. }
  1040. unlock(&up->exl);
  1041. sleep(&up->waitr, haswaitq, up);
  1042. lock(&up->exl);
  1043. wq = up->waitq;
  1044. up->waitq = wq->next;
  1045. up->nwait--;
  1046. unlock(&up->exl);
  1047. qunlock(&up->qwaitr);
  1048. poperror();
  1049. if(w)
  1050. memmove(w, &wq->w, sizeof(Waitmsg));
  1051. cpid = wq->w.pid;
  1052. free(wq);
  1053. return cpid;
  1054. }
  1055. Proc*
  1056. proctab(int i)
  1057. {
  1058. return &procalloc.arena[i];
  1059. }
  1060. void
  1061. dumpaproc(Proc *p)
  1062. {
  1063. ulong bss;
  1064. char *s;
  1065. if(p == 0)
  1066. return;
  1067. bss = 0;
  1068. if(p->seg[BSEG])
  1069. bss = p->seg[BSEG]->top;
  1070. s = p->psstate;
  1071. if(s == 0)
  1072. s = statename[p->state];
  1073. print("%3lud:%10s pc %8lux dbgpc %8lux %8s (%s) ut %ld st %ld bss %lux qpc %lux nl %lud nd %lud lpc %lux pri %lud\n",
  1074. p->pid, p->text, p->pc, dbgpc(p), s, statename[p->state],
  1075. p->time[0], p->time[1], bss, p->qpc, p->nlocks.ref, p->delaysched, p->lastlock ? p->lastlock->pc : 0, p->priority);
  1076. }
  1077. void
  1078. procdump(void)
  1079. {
  1080. int i;
  1081. Proc *p;
  1082. if(up)
  1083. print("up %lud\n", up->pid);
  1084. else
  1085. print("no current process\n");
  1086. for(i=0; i<conf.nproc; i++) {
  1087. p = &procalloc.arena[i];
  1088. if(p->state == Dead)
  1089. continue;
  1090. dumpaproc(p);
  1091. }
  1092. }
  1093. /*
  1094. * wait till all processes have flushed their mmu
  1095. * state about segement s
  1096. */
  1097. void
  1098. procflushseg(Segment *s)
  1099. {
  1100. int i, ns, nm, nwait;
  1101. Proc *p;
  1102. /*
  1103. * tell all processes with this
  1104. * segment to flush their mmu's
  1105. */
  1106. nwait = 0;
  1107. for(i=0; i<conf.nproc; i++) {
  1108. p = &procalloc.arena[i];
  1109. if(p->state == Dead)
  1110. continue;
  1111. for(ns = 0; ns < NSEG; ns++)
  1112. if(p->seg[ns] == s){
  1113. p->newtlb = 1;
  1114. for(nm = 0; nm < conf.nmach; nm++){
  1115. if(MACHP(nm)->proc == p){
  1116. MACHP(nm)->flushmmu = 1;
  1117. nwait++;
  1118. }
  1119. }
  1120. break;
  1121. }
  1122. }
  1123. if(nwait == 0)
  1124. return;
  1125. /*
  1126. * wait for all processors to take a clock interrupt
  1127. * and flush their mmu's
  1128. */
  1129. for(nm = 0; nm < conf.nmach; nm++)
  1130. if(MACHP(nm) != m)
  1131. while(MACHP(nm)->flushmmu)
  1132. sched();
  1133. }
  1134. void
  1135. scheddump(void)
  1136. {
  1137. Proc *p;
  1138. Schedq *rq;
  1139. for(rq = &runq[Nrq-1]; rq >= runq; rq--){
  1140. if(rq->head == 0)
  1141. continue;
  1142. print("rq%ld:", rq-runq);
  1143. for(p = rq->head; p; p = p->rnext)
  1144. print(" %lud(%lud)", p->pid, m->ticks - p->readytime);
  1145. print("\n");
  1146. delay(150);
  1147. }
  1148. print("nrdy %d\n", nrdy);
  1149. }
  1150. void
  1151. kproc(char *name, void (*func)(void *), void *arg)
  1152. {
  1153. Proc *p;
  1154. static Pgrp *kpgrp;
  1155. p = newproc();
  1156. p->psstate = 0;
  1157. p->procmode = 0640;
  1158. p->kp = 1;
  1159. p->noswap = 1;
  1160. p->fpsave = up->fpsave;
  1161. p->scallnr = up->scallnr;
  1162. p->s = up->s;
  1163. p->nerrlab = 0;
  1164. p->slash = up->slash;
  1165. p->dot = up->dot;
  1166. if(p->dot)
  1167. incref(p->dot);
  1168. memmove(p->note, up->note, sizeof(p->note));
  1169. p->nnote = up->nnote;
  1170. p->notified = 0;
  1171. p->lastnote = up->lastnote;
  1172. p->notify = up->notify;
  1173. p->ureg = 0;
  1174. p->dbgreg = 0;
  1175. procpriority(p, PriKproc, 0);
  1176. kprocchild(p, func, arg);
  1177. kstrdup(&p->user, eve);
  1178. kstrdup(&p->text, name);
  1179. if(kpgrp == 0)
  1180. kpgrp = newpgrp();
  1181. p->pgrp = kpgrp;
  1182. incref(kpgrp);
  1183. memset(p->time, 0, sizeof(p->time));
  1184. p->time[TReal] = MACHP(0)->ticks;
  1185. ready(p);
  1186. /*
  1187. * since the bss/data segments are now shareable,
  1188. * any mmu info about this process is now stale
  1189. * and has to be discarded.
  1190. */
  1191. p->newtlb = 1;
  1192. flushmmu();
  1193. }
  1194. /*
  1195. * called splhi() by notify(). See comment in notify for the
  1196. * reasoning.
  1197. */
  1198. void
  1199. procctl(Proc *p)
  1200. {
  1201. char *state;
  1202. ulong s;
  1203. switch(p->procctl) {
  1204. case Proc_exitbig:
  1205. spllo();
  1206. pexit("Killed: Insufficient physical memory", 1);
  1207. case Proc_exitme:
  1208. spllo(); /* pexit has locks in it */
  1209. pexit("Killed", 1);
  1210. case Proc_traceme:
  1211. if(p->nnote == 0)
  1212. return;
  1213. /* No break */
  1214. case Proc_stopme:
  1215. p->procctl = 0;
  1216. state = p->psstate;
  1217. p->psstate = "Stopped";
  1218. /* free a waiting debugger */
  1219. s = spllo();
  1220. qlock(&p->debug);
  1221. if(p->pdbg) {
  1222. wakeup(&p->pdbg->sleep);
  1223. p->pdbg = 0;
  1224. }
  1225. qunlock(&p->debug);
  1226. splhi();
  1227. p->state = Stopped;
  1228. sched();
  1229. p->psstate = state;
  1230. splx(s);
  1231. return;
  1232. }
  1233. }
  1234. #include "errstr.h"
  1235. void
  1236. error(char *err)
  1237. {
  1238. spllo();
  1239. assert(up->nerrlab < NERR);
  1240. kstrcpy(up->errstr, err, ERRMAX);
  1241. setlabel(&up->errlab[NERR-1]);
  1242. nexterror();
  1243. }
  1244. void
  1245. nexterror(void)
  1246. {
  1247. gotolabel(&up->errlab[--up->nerrlab]);
  1248. }
  1249. void
  1250. exhausted(char *resource)
  1251. {
  1252. char buf[ERRMAX];
  1253. sprint(buf, "no free %s", resource);
  1254. iprint("%s\n", buf);
  1255. error(buf);
  1256. }
  1257. void
  1258. killbig(char *why)
  1259. {
  1260. int i;
  1261. Segment *s;
  1262. ulong l, max;
  1263. Proc *p, *ep, *kp;
  1264. max = 0;
  1265. kp = 0;
  1266. ep = procalloc.arena+conf.nproc;
  1267. for(p = procalloc.arena; p < ep; p++) {
  1268. if(p->state == Dead || p->kp)
  1269. continue;
  1270. l = 0;
  1271. for(i=1; i<NSEG; i++) {
  1272. s = p->seg[i];
  1273. if(s != 0)
  1274. l += s->top - s->base;
  1275. }
  1276. if(l > max && ((p->procmode&0222) || strcmp(eve, p->user)!=0)) {
  1277. kp = p;
  1278. max = l;
  1279. }
  1280. }
  1281. print("%lud: %s killed: %s\n", kp->pid, kp->text, why);
  1282. for(p = procalloc.arena; p < ep; p++) {
  1283. if(p->state == Dead || p->kp)
  1284. continue;
  1285. if(p != kp && p->seg[BSEG] && p->seg[BSEG] == kp->seg[BSEG])
  1286. p->procctl = Proc_exitbig;
  1287. }
  1288. kp->procctl = Proc_exitbig;
  1289. for(i = 0; i < NSEG; i++) {
  1290. s = kp->seg[i];
  1291. if(s != 0 && canqlock(&s->lk)) {
  1292. mfreeseg(s, s->base, (s->top - s->base)/BY2PG);
  1293. qunlock(&s->lk);
  1294. }
  1295. }
  1296. }
  1297. /*
  1298. * change ownership to 'new' of all processes owned by 'old'. Used when
  1299. * eve changes.
  1300. */
  1301. void
  1302. renameuser(char *old, char *new)
  1303. {
  1304. Proc *p, *ep;
  1305. ep = procalloc.arena+conf.nproc;
  1306. for(p = procalloc.arena; p < ep; p++)
  1307. if(p->user!=nil && strcmp(old, p->user)==0)
  1308. kstrdup(&p->user, new);
  1309. }
  1310. /*
  1311. * time accounting called by clock() splhi'd
  1312. */
  1313. void
  1314. accounttime(void)
  1315. {
  1316. Proc *p;
  1317. ulong n, per;
  1318. static ulong nrun;
  1319. p = m->proc;
  1320. if(p) {
  1321. nrun++;
  1322. p->time[p->insyscall]++;
  1323. }
  1324. /* calculate decaying duty cycles */
  1325. n = perfticks();
  1326. per = n - m->perf.last;
  1327. m->perf.last = n;
  1328. per = (m->perf.period*(HZ-1) + per)/HZ;
  1329. if(per != 0)
  1330. m->perf.period = per;
  1331. m->perf.avg_inidle = (m->perf.avg_inidle*(HZ-1)+m->perf.inidle)/HZ;
  1332. m->perf.inidle = 0;
  1333. m->perf.avg_inintr = (m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ;
  1334. m->perf.inintr = 0;
  1335. /* only one processor gets to compute system load averages */
  1336. if(m->machno != 0)
  1337. return;
  1338. /*
  1339. * calculate decaying load average.
  1340. * if we decay by (n-1)/n then it takes
  1341. * n clock ticks to go from load L to .36 L once
  1342. * things quiet down. it takes about 5 n clock
  1343. * ticks to go to zero. so using HZ means this is
  1344. * approximately the load over the last second,
  1345. * with a tail lasting about 5 seconds.
  1346. */
  1347. n = nrun;
  1348. nrun = 0;
  1349. n = (nrdy+n)*1000;
  1350. m->load = (m->load*(HZ-1)+n)/HZ;
  1351. }
  1352. static void
  1353. pidhash(Proc *p)
  1354. {
  1355. int h;
  1356. h = p->pid % nelem(procalloc.ht);
  1357. lock(&procalloc);
  1358. p->pidhash = procalloc.ht[h];
  1359. procalloc.ht[h] = p;
  1360. unlock(&procalloc);
  1361. }
  1362. static void
  1363. pidunhash(Proc *p)
  1364. {
  1365. int h;
  1366. Proc **l;
  1367. h = p->pid % nelem(procalloc.ht);
  1368. lock(&procalloc);
  1369. for(l = &procalloc.ht[h]; *l != nil; l = &(*l)->pidhash)
  1370. if(*l == p){
  1371. *l = p->pidhash;
  1372. break;
  1373. }
  1374. unlock(&procalloc);
  1375. }
  1376. int
  1377. procindex(ulong pid)
  1378. {
  1379. Proc *p;
  1380. int h;
  1381. int s;
  1382. s = -1;
  1383. h = pid % nelem(procalloc.ht);
  1384. lock(&procalloc);
  1385. for(p = procalloc.ht[h]; p != nil; p = p->pidhash)
  1386. if(p->pid == pid){
  1387. s = p - procalloc.arena;
  1388. break;
  1389. }
  1390. unlock(&procalloc);
  1391. return s;
  1392. }