proc.c 33 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834
  1. #include <u.h>
  2. #include "../port/lib.h"
  3. #include "mem.h"
  4. #include "dat.h"
  5. #include "fns.h"
  6. #include "../port/error.h"
  7. #include "../port/edf.h"
  8. #include "errstr.h"
  9. #include <trace.h>
  10. enum
  11. {
  12. Scaling=2,
  13. /*
  14. * number of schedulers used.
  15. * 1 uses just one, which is the behavior of Plan 9.
  16. */
  17. Nsched = 1,
  18. };
  19. Ref noteidalloc;
  20. static Ref pidalloc;
  21. /*
  22. * Because machines with many cores are NUMA, we try to use
  23. * a different scheduler per color
  24. */
  25. Sched run[Nsched];
  26. struct Procalloc procalloc;
  27. extern Proc* psalloc(void);
  28. extern void pshash(Proc*);
  29. extern void psrelease(Proc*);
  30. extern void psunhash(Proc*);
  31. static int reprioritize(Proc*);
  32. static void updatecpu(Proc*);
  33. static void rebalance(void);
  34. int schedsteals = 1;
  35. int scheddonates = 0;
  36. char *statename[] =
  37. { /* BUG: generate automatically */
  38. "Dead",
  39. "Moribund",
  40. "Ready",
  41. "Scheding",
  42. "Running",
  43. "Queueing",
  44. "QueueingR",
  45. "QueueingW",
  46. "Wakeme",
  47. "Broken",
  48. "Stopped",
  49. "Rendez",
  50. "Waitrelease",
  51. "Exotic",
  52. "Down",
  53. };
  54. void
  55. setmachsched(Mach *mp)
  56. {
  57. int color;
  58. color = corecolor(mp->machno);
  59. if(color < 0){
  60. print("unknown color for cpu%d\n", mp->machno);
  61. color = 0;
  62. }
  63. mp->sch = &run[color%Nsched];
  64. }
  65. Sched*
  66. procsched(Proc *p)
  67. {
  68. Mach *pm;
  69. pm = p->mp;
  70. if(pm == nil)
  71. pm = machp();
  72. if(pm->sch == nil)
  73. setmachsched(pm);
  74. return pm->sch;
  75. }
  76. /*
  77. * bad planning, once more.
  78. */
  79. void
  80. procinit0(void)
  81. {
  82. int i;
  83. for(i = 0; i < Nsched; i++)
  84. run[i].schedgain = 30;
  85. }
  86. /*
  87. * Always splhi()'ed.
  88. */
  89. void
  90. schedinit(void) /* never returns */
  91. {
  92. Proc *up;
  93. Edf *e;
  94. machp()->inidle = 1;
  95. if(machp()->sch == nil){
  96. print("schedinit: no sch for cpu%d\n", machp()->machno);
  97. setmachsched(machp());
  98. }
  99. ainc(&machp()->sch->nmach);
  100. setlabel(&machp()->sched);
  101. up = machp()->externup;
  102. if(up) {
  103. if((e = up->edf) && (e->flags & Admitted))
  104. edfrecord(up);
  105. coherence();
  106. machp()->proc = 0;
  107. switch(up->state) {
  108. case Running:
  109. ready(up);
  110. break;
  111. case Moribund:
  112. up->state = Dead;
  113. stopac();
  114. edfstop(up);
  115. if (up->edf)
  116. free(up->edf);
  117. up->edf = nil;
  118. /*
  119. * Holding locks from pexit:
  120. * procalloc
  121. * pga
  122. */
  123. mmurelease(up);
  124. unlock(&pga.l);
  125. psrelease(up);
  126. unlock(&procalloc.l);
  127. break;
  128. }
  129. up->mach = nil;
  130. updatecpu(up);
  131. machp()->externup = nil;
  132. }
  133. sched();
  134. }
  135. /*
  136. * Check if the stack has more than 4*KiB free.
  137. * Do not call panic, the stack is gigantic.
  138. */
  139. static void
  140. stackok(void)
  141. {
  142. Proc *up = externup();
  143. char dummy;
  144. if(&dummy < (char*)up->kstack + 4*KiB){
  145. print("tc kernel stack overflow, cpu%d stopped\n", machp()->machno);
  146. DONE();
  147. }
  148. }
  149. /*
  150. * If changing this routine, look also at sleep(). It
  151. * contains a copy of the guts of sched().
  152. */
  153. void
  154. sched(void)
  155. {
  156. Proc *p;
  157. Sched *sch;
  158. Proc *up = externup();
  159. sch = machp()->sch;
  160. if(machp()->ilockdepth)
  161. panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p",
  162. machp()->machno,
  163. machp()->ilockdepth,
  164. up? up->lastilock: nil,
  165. (up && up->lastilock)? 0/*up->lastilock->pc*/: 0,
  166. getcallerpc());
  167. if(up){
  168. /*
  169. * Delay the sched until the process gives up the locks
  170. * it is holding. This avoids dumb lock loops.
  171. * Don't delay if the process is Moribund.
  172. * It called sched to die.
  173. * But do sched eventually. This avoids a missing unlock
  174. * from hanging the entire kernel.
  175. * But don't reschedule procs holding palloc or procalloc.
  176. * Those are far too important to be holding while asleep.
  177. *
  178. * This test is not exact. There can still be a few
  179. * instructions in the middle of taslock when a process
  180. * holds a lock but Lock.p has not yet been initialized.
  181. */
  182. if(up->nlocks)
  183. if(up->state != Moribund)
  184. if(up->delaysched < 20
  185. || pga.l.p == up
  186. || procalloc.l.p == up){
  187. up->delaysched++;
  188. sch->delayedscheds++;
  189. return;
  190. }
  191. up->delaysched = 0;
  192. splhi();
  193. /* statistics */
  194. if(up->nqtrap == 0 && up->nqsyscall == 0)
  195. up->nfullq++;
  196. machp()->cs++;
  197. stackok();
  198. procsave(up);
  199. mmuflushtlb();
  200. if(setlabel(&up->sched)){
  201. procrestore(up);
  202. spllo();
  203. return;
  204. }
  205. gotolabel(&machp()->sched);
  206. }
  207. machp()->inidle = 1;
  208. p = runproc();
  209. machp()->inidle = 0;
  210. if(!p->edf){
  211. updatecpu(p);
  212. p->priority = reprioritize(p);
  213. }
  214. if(p != machp()->readied)
  215. machp()->schedticks = machp()->ticks + HZ/10;
  216. machp()->readied = 0;
  217. machp()->externup = p;
  218. up = p;
  219. up->nqtrap = 0;
  220. up->nqsyscall = 0;
  221. up->state = Running;
  222. up->mach = MACHP(machp()->machno);
  223. machp()->proc = up;
  224. mmuswitch(up);
  225. assert(!up->wired || up->wired == machp());
  226. gotolabel(&up->sched);
  227. }
  228. int
  229. anyready(void)
  230. {
  231. return machp()->sch->runvec;
  232. }
  233. int
  234. anyhigher(void)
  235. {
  236. Proc *up = externup();
  237. return machp()->sch->runvec & ~((1<<(up->priority+1))-1);
  238. }
  239. /*
  240. * here once per clock tick to see if we should resched
  241. */
  242. void
  243. hzsched(void)
  244. {
  245. Proc *up = externup();
  246. /* once a second, rebalance will reprioritize ready procs */
  247. if(machp()->machno == 0)
  248. rebalance();
  249. /* unless preempted, get to run for at least 100ms */
  250. if(anyhigher()
  251. || (!up->fixedpri && machp()->ticks > machp()->schedticks && anyready())){
  252. machp()->readied = nil; /* avoid cooperative scheduling */
  253. up->delaysched++;
  254. }
  255. }
  256. /*
  257. * here at the end of non-clock interrupts to see if we should preempt the
  258. * current process. Returns 1 if preempted, 0 otherwise.
  259. */
  260. int
  261. preempted(void)
  262. {
  263. Proc *up = externup();
  264. if(up && up->state == Running)
  265. if(up->preempted == 0)
  266. if(anyhigher())
  267. if(!active.exiting){
  268. machp()->readied = nil; /* avoid cooperative scheduling */
  269. up->preempted = 1;
  270. sched();
  271. splhi();
  272. up->preempted = 0;
  273. return 1;
  274. }
  275. return 0;
  276. }
  277. /*
  278. * Update the cpu time average for this particular process,
  279. * which is about to change from up -> not up or vice versa.
  280. * p->lastupdate is the last time an updatecpu happened.
  281. *
  282. * The cpu time average is a decaying average that lasts
  283. * about D clock ticks. D is chosen to be approximately
  284. * the cpu time of a cpu-intensive "quick job". A job has to run
  285. * for approximately D clock ticks before we home in on its
  286. * actual cpu usage. Thus if you manage to get in and get out
  287. * quickly, you won't be penalized during your burst. Once you
  288. * start using your share of the cpu for more than about D
  289. * clock ticks though, your p->cpu hits 1000 (1.0) and you end up
  290. * below all the other quick jobs. Interactive tasks, because
  291. * they basically always use less than their fair share of cpu,
  292. * will be rewarded.
  293. *
  294. * If the process has not been running, then we want to
  295. * apply the filter
  296. *
  297. * cpu = cpu * (D-1)/D
  298. *
  299. * n times, yielding
  300. *
  301. * cpu = cpu * ((D-1)/D)^n
  302. *
  303. * but D is big enough that this is approximately
  304. *
  305. * cpu = cpu * (D-n)/D
  306. *
  307. * so we use that instead.
  308. *
  309. * If the process has been running, we apply the filter to
  310. * 1 - cpu, yielding a similar equation. Note that cpu is
  311. * stored in fixed point (* 1000).
  312. *
  313. * Updatecpu must be called before changing up, in order
  314. * to maintain accurate cpu usage statistics. It can be called
  315. * at any time to bring the stats for a given proc up-to-date.
  316. */
  317. static void
  318. updatecpu(Proc *p)
  319. {
  320. Proc *up = externup();
  321. int D, n, t, ocpu;
  322. if(p->edf)
  323. return;
  324. t = MACHP(0)->ticks*Scaling + Scaling/2;
  325. n = t - p->lastupdate;
  326. p->lastupdate = t;
  327. if(n == 0)
  328. return;
  329. if(machp()->sch == nil) /* may happen during boot */
  330. return;
  331. D = machp()->sch->schedgain*HZ*Scaling;
  332. if(n > D)
  333. n = D;
  334. ocpu = p->cpu;
  335. if(p != up)
  336. p->cpu = (ocpu*(D-n))/D;
  337. else{
  338. t = 1000 - ocpu;
  339. t = (t*(D-n))/D;
  340. p->cpu = 1000 - t;
  341. }
  342. //iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu);
  343. }
  344. /*
  345. * On average, p has used p->cpu of a cpu recently.
  346. * Its fair share is conf.nmach/machp()->load of a cpu. If it has been getting
  347. * too much, penalize it. If it has been getting not enough, reward it.
  348. * I don't think you can get much more than your fair share that
  349. * often, so most of the queues are for using less. Having a priority
  350. * of 3 means you're just right. Having a higher priority (up to p->basepri)
  351. * means you're not using as much as you could.
  352. */
  353. static int
  354. reprioritize(Proc *p)
  355. {
  356. int fairshare, n, load, ratio;
  357. load = MACHP(0)->load;
  358. if(load == 0)
  359. return p->basepri;
  360. /*
  361. * fairshare = 1.000 * conf.nproc * 1.000/load,
  362. * except the decimal point is moved three places
  363. * on both load and fairshare.
  364. */
  365. fairshare = (sys->nmach*1000*1000)/load;
  366. n = p->cpu;
  367. if(n == 0)
  368. n = 1;
  369. ratio = (fairshare+n/2) / n;
  370. if(ratio > p->basepri)
  371. ratio = p->basepri;
  372. if(ratio < 0)
  373. panic("reprioritize");
  374. //iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio);
  375. return ratio;
  376. }
  377. /*
  378. * add a process to a scheduling queue
  379. */
  380. static void
  381. queueproc(Sched *sch, Schedq *rq, Proc *p, int locked)
  382. {
  383. int pri;
  384. pri = rq - sch->runq;
  385. if(!locked)
  386. lock(&sch->l);
  387. else if(canlock(&sch->l))
  388. panic("queueproc: locked and can lock");
  389. p->priority = pri;
  390. p->rnext = 0;
  391. if(rq->tail)
  392. rq->tail->rnext = p;
  393. else
  394. rq->head = p;
  395. rq->tail = p;
  396. rq->n++;
  397. sch->nrdy++;
  398. sch->runvec |= 1<<pri;
  399. if(!locked)
  400. unlock(&sch->l);
  401. }
  402. /*
  403. * try to remove a process from a scheduling queue (called splhi)
  404. */
  405. Proc*
  406. dequeueproc(Sched *sch, Schedq *rq, Proc *tp)
  407. {
  408. Proc *l, *p;
  409. if(!canlock(&sch->l))
  410. return nil;
  411. /*
  412. * the queue may have changed before we locked runq,
  413. * refind the target process.
  414. */
  415. l = 0;
  416. for(p = rq->head; p; p = p->rnext){
  417. if(p == tp)
  418. break;
  419. l = p;
  420. }
  421. /*
  422. * p->mach==0 only when process state is saved
  423. */
  424. if(p == 0 || p->mach){
  425. unlock(&sch->l);
  426. return nil;
  427. }
  428. if(p->rnext == 0)
  429. rq->tail = l;
  430. if(l)
  431. l->rnext = p->rnext;
  432. else
  433. rq->head = p->rnext;
  434. if(rq->head == nil)
  435. sch->runvec &= ~(1<<(rq-sch->runq));
  436. rq->n--;
  437. sch->nrdy--;
  438. if(p->state != Ready)
  439. print("dequeueproc %s %d %s\n", p->text, p->pid, statename[p->state]);
  440. unlock(&sch->l);
  441. return p;
  442. }
  443. static void
  444. schedready(Sched *sch, Proc *p, int locked)
  445. {
  446. Mpl pl;
  447. int pri;
  448. Schedq *rq;
  449. Proc *up;
  450. pl = splhi();
  451. if(edfready(p)){
  452. splx(pl);
  453. return;
  454. }
  455. /* no ACs yet, maybe never.
  456. if(machp()->nixtype == NIXAC)
  457. MACHP(0)->readied = p;
  458. */
  459. /*
  460. * BUG: if schedready is called to rebalance the scheduler,
  461. * for another core, then this is wrong.
  462. */
  463. up = externup();
  464. if(up != p)
  465. machp()->readied = p; /* group scheduling */
  466. updatecpu(p);
  467. pri = reprioritize(p);
  468. p->priority = pri;
  469. rq = &sch->runq[pri];
  470. p->state = Ready;
  471. queueproc(sch, rq, p, locked);
  472. if(p->trace)
  473. proctrace(p, SReady, 0);
  474. splx(pl);
  475. }
  476. /*
  477. * ready(p) picks a new priority for a process and sticks it in the
  478. * runq for that priority.
  479. */
  480. void
  481. ready(Proc *p)
  482. {
  483. schedready(procsched(p), p, 0);
  484. }
  485. /*
  486. * yield the processor and drop our priority
  487. */
  488. void
  489. yield(void)
  490. {
  491. Proc *up = externup();
  492. if(anyready()){
  493. /* pretend we just used 1/2 tick */
  494. up->lastupdate -= Scaling/2;
  495. sched();
  496. }
  497. }
  498. /*
  499. * recalculate priorities once a second. We need to do this
  500. * since priorities will otherwise only be recalculated when
  501. * the running process blocks.
  502. */
  503. static void
  504. rebalance(void)
  505. {
  506. Mpl pl;
  507. int pri, npri;
  508. int64_t t;
  509. Sched *sch;
  510. Schedq *rq;
  511. Proc *p;
  512. sch = machp()->sch;
  513. t = machp()->ticks;
  514. if(t - sch->balancetime < HZ)
  515. return;
  516. sch->balancetime = t;
  517. for(pri=0, rq=sch->runq; pri<Npriq; pri++, rq++){
  518. another:
  519. p = rq->head;
  520. if(p == nil)
  521. continue;
  522. if(p->mp != MACHP(machp()->machno))
  523. continue;
  524. if(pri == p->basepri)
  525. continue;
  526. updatecpu(p);
  527. npri = reprioritize(p);
  528. if(npri != pri){
  529. pl = splhi();
  530. p = dequeueproc(sch, rq, p);
  531. if(p)
  532. queueproc(sch, &sch->runq[npri], p, 0);
  533. splx(pl);
  534. goto another;
  535. }
  536. }
  537. }
  538. /*
  539. * Is this scheduler overloaded?
  540. * should it pass processes to any other underloaded scheduler?
  541. */
  542. static int
  543. overloaded(Sched *sch)
  544. {
  545. return sch->nmach != 0 && sch->nrdy > sch->nmach;
  546. }
  547. #if 0
  548. /*
  549. * Is it reasonable to give processes to this scheduler?
  550. */
  551. static int
  552. underloaded(Sched *sch)
  553. {
  554. return sch->nrdy < sch->nmach;
  555. }
  556. static void
  557. ipisched(Sched *sch)
  558. {
  559. Mach* mp;
  560. int i;
  561. for(i = 0; i < MACHMAX; i++){
  562. mp = sys->machptr[i];
  563. if(mp != nil && mp != machp() && mp->online && mp->sch == sch)
  564. apicipi(mp->apicno);
  565. }
  566. }
  567. #endif
  568. /*
  569. * If we are idle, check if another scheduler is overloaded and
  570. * steal a new process from it. But steal low priority processes to
  571. * avoid disturbing high priority ones.
  572. */
  573. static Proc*
  574. steal(void)
  575. {
  576. static int last; /* donate in round robin */
  577. int start, i;
  578. Schedq *rq;
  579. Sched *sch;
  580. Proc *p;
  581. /*
  582. * measures show that stealing is expensive, we are donating
  583. * by now but only when calling exec(). See maydonate().
  584. */
  585. if(!schedsteals)
  586. return nil;
  587. start = last;
  588. for(i = 0; i < Nsched; i++){
  589. last = (start+i)%Nsched;
  590. sch = &run[last];
  591. if(sch == machp()->sch || sch->nmach == 0 || !overloaded(sch))
  592. continue;
  593. for(rq = &sch->runq[Nrq-1]; rq >= sch->runq; rq--){
  594. for(p = rq->head; p != nil; p = p->rnext)
  595. if(!p->wired && p->priority < PriKproc)
  596. break;
  597. if(p != nil && dequeueproc(sch, rq, p) != nil)
  598. return p;
  599. }
  600. }
  601. return nil;
  602. }
  603. /*
  604. * pick a process to run
  605. */
  606. Proc*
  607. runproc(void)
  608. {
  609. Schedq *rq;
  610. Sched *sch;
  611. Proc *p;
  612. uint64_t start, now;
  613. int i;
  614. start = perfticks();
  615. sch = machp()->sch;
  616. /* cooperative scheduling until the clock ticks */
  617. if((p=machp()->readied) && p->mach==0 && p->state==Ready
  618. && sch->runq[Nrq-1].head == nil && sch->runq[Nrq-2].head == nil
  619. && (!p->wired || p->wired == machp())){
  620. sch->skipscheds++;
  621. rq = &sch->runq[p->priority];
  622. goto found;
  623. }
  624. sch->preempts++;
  625. loop:
  626. /*
  627. * find a process that last ran on this processor (affinity),
  628. * or one that hasn't moved in a while (load balancing). Every
  629. * time around the loop affinity goes down.
  630. */
  631. spllo();
  632. for(i = 0;; i++){
  633. /*
  634. * find the highest priority target process that this
  635. * processor can run given affinity constraints.
  636. *
  637. */
  638. for(rq = &sch->runq[Nrq-1]; rq >= sch->runq; rq--){
  639. for(p = rq->head; p; p = p->rnext){
  640. if(p->mp == nil || p->mp == MACHP(machp()->machno)
  641. || (!p->wired && i > 0))
  642. goto found;
  643. }
  644. }
  645. p = steal();
  646. if(p != nil){
  647. splhi();
  648. goto stolen;
  649. }
  650. /* waste time or halt the CPU */
  651. idlehands();
  652. /* remember how much time we're here */
  653. now = perfticks();
  654. machp()->perf.inidle += now-start;
  655. start = now;
  656. }
  657. found:
  658. splhi();
  659. p = dequeueproc(sch, rq, p);
  660. if(p == nil)
  661. goto loop;
  662. stolen:
  663. p->state = Scheding;
  664. p->mp = MACHP(machp()->machno);
  665. if(edflock(p)){
  666. edfrun(p, rq == &sch->runq[PriEdf]); /* start deadline timer and do admin */
  667. edfunlock();
  668. }
  669. if(p->trace)
  670. proctrace(p, SRun, 0);
  671. return p;
  672. }
  673. int
  674. canpage(Proc *p)
  675. {
  676. int ok;
  677. Sched *sch;
  678. splhi();
  679. sch = procsched(p);
  680. lock(&sch->l);
  681. /* Only reliable way to see if we are Running */
  682. if(p->mach == 0) {
  683. p->newtlb = 1;
  684. ok = 1;
  685. }
  686. else
  687. ok = 0;
  688. unlock(&sch->l);
  689. spllo();
  690. return ok;
  691. }
  692. Proc*
  693. newproc(void)
  694. {
  695. Proc *up = externup();
  696. Proc *p;
  697. p = psalloc();
  698. p->state = Scheding;
  699. p->psstate = "New";
  700. p->mach = 0;
  701. p->qnext = 0;
  702. p->nchild = 0;
  703. p->nwait = 0;
  704. p->waitq = 0;
  705. p->parent = 0;
  706. p->pgrp = 0;
  707. p->egrp = 0;
  708. p->fgrp = 0;
  709. p->rgrp = 0;
  710. p->pdbg = 0;
  711. p->kp = 0;
  712. if(up != nil && up->procctl == Proc_tracesyscall)
  713. p->procctl = Proc_tracesyscall;
  714. else
  715. p->procctl = 0;
  716. p->syscalltrace = nil;
  717. p->notepending = 0;
  718. p->ureg = 0;
  719. p->privatemem = 0;
  720. p->noswap = 0;
  721. p->errstr = p->errbuf0;
  722. p->syserrstr = p->errbuf1;
  723. p->errbuf0[0] = '\0';
  724. p->errbuf1[0] = '\0';
  725. p->nlocks = 0;
  726. p->delaysched = 0;
  727. p->trace = 0;
  728. kstrdup(&p->user, "*nouser");
  729. kstrdup(&p->text, "*notext");
  730. kstrdup(&p->args, "");
  731. p->nargs = 0;
  732. p->setargs = 0;
  733. memset(p->seg, 0, sizeof p->seg);
  734. p->pid = incref(&pidalloc);
  735. pshash(p);
  736. p->noteid = incref(&noteidalloc);
  737. if(p->pid <= 0 || p->noteid <= 0)
  738. panic("pidalloc");
  739. if(p->kstack == 0)
  740. p->kstack = smalloc(KSTACK);
  741. /* sched params */
  742. p->mp = 0;
  743. p->wired = 0;
  744. procpriority(p, PriNormal, 0);
  745. p->cpu = 0;
  746. p->lastupdate = MACHP(0)->ticks*Scaling;
  747. p->edf = nil;
  748. p->ntrap = 0;
  749. p->nintr = 0;
  750. p->nsyscall = 0;
  751. p->nactrap = 0;
  752. p->nacsyscall = 0;
  753. p->nicc = 0;
  754. p->actime = 0ULL;
  755. p->tctime = 0ULL;
  756. p->ac = nil;
  757. p->nfullq = 0;
  758. memset(&p->MMU, 0, sizeof p->MMU);
  759. return p;
  760. }
  761. /*
  762. * wire this proc to a machine
  763. */
  764. void
  765. procwired(Proc *p, int bm)
  766. {
  767. Proc *up = externup();
  768. Proc *pp;
  769. int i;
  770. char nwired[MACHMAX];
  771. Mach *wm;
  772. if(bm < 0){
  773. /* pick a machine to wire to */
  774. memset(nwired, 0, sizeof(nwired));
  775. p->wired = 0;
  776. for(i=0; (pp = psincref(i)) != nil; i++){
  777. wm = pp->wired;
  778. if(wm && pp->pid)
  779. nwired[machp()->machno]++;
  780. psdecref(pp);
  781. }
  782. bm = 0;
  783. for(i=0; i<sys->nmach; i++)
  784. if(nwired[i] < nwired[bm])
  785. bm = i;
  786. } else {
  787. /* use the virtual machine requested */
  788. bm = bm % sys->nmach;
  789. }
  790. p->wired = MACHP(bm);
  791. p->mp = p->wired;
  792. /*
  793. * adjust our color to the new domain.
  794. */
  795. if(up == nil || p != up)
  796. return;
  797. up->color = corecolor(up->mp->machno);
  798. qlock(&up->seglock);
  799. for(i = 0; i < NSEG; i++)
  800. if(up->seg[i])
  801. up->seg[i]->color = up->color;
  802. qunlock(&up->seglock);
  803. }
  804. void
  805. procpriority(Proc *p, int pri, int fixed)
  806. {
  807. if(pri >= Npriq)
  808. pri = Npriq - 1;
  809. else if(pri < 0)
  810. pri = 0;
  811. p->basepri = pri;
  812. p->priority = pri;
  813. if(fixed){
  814. p->fixedpri = 1;
  815. } else {
  816. p->fixedpri = 0;
  817. }
  818. }
  819. /*
  820. * sleep if a condition is not true. Another process will
  821. * awaken us after it sets the condition. When we awaken
  822. * the condition may no longer be true.
  823. *
  824. * we lock both the process and the rendezvous to keep r->p
  825. * and p->r synchronized.
  826. */
  827. void
  828. sleep(Rendez *r, int (*f)(void*), void *arg)
  829. {
  830. Proc *up = externup();
  831. Mpl pl;
  832. pl = splhi();
  833. if(up->nlocks)
  834. print("process %d sleeps with %d locks held, last lock %#p locked at pc %#p, sleep called from %#p\n",
  835. up->pid, up->nlocks, up->lastlock, 0/*up->lastlock->pc*/, getcallerpc());
  836. lock(&r->l);
  837. lock(&up->rlock);
  838. if(r->_p){
  839. print("double sleep called from %#p, %d %d\n",
  840. getcallerpc(&r), r->_p->pid, up->pid);
  841. dumpstack();
  842. }
  843. /*
  844. * Wakeup only knows there may be something to do by testing
  845. * r->p in order to get something to lock on.
  846. * Flush that information out to memory in case the sleep is
  847. * committed.
  848. */
  849. r->_p = up;
  850. if((*f)(arg) || up->notepending){
  851. /*
  852. * if condition happened or a note is pending
  853. * never mind
  854. */
  855. r->_p = nil;
  856. unlock(&up->rlock);
  857. unlock(&r->l);
  858. } else {
  859. /*
  860. * now we are committed to
  861. * change state and call scheduler
  862. */
  863. if(up->trace)
  864. proctrace(up, SSleep, 0);
  865. up->state = Wakeme;
  866. up->r = r;
  867. /* statistics */
  868. machp()->cs++;
  869. procsave(up);
  870. mmuflushtlb();
  871. if(setlabel(&up->sched)) {
  872. /*
  873. * here when the process is awakened
  874. */
  875. procrestore(up);
  876. spllo();
  877. } else {
  878. /*
  879. * here to go to sleep (i.e. stop Running)
  880. */
  881. unlock(&up->rlock);
  882. unlock(&r->l);
  883. gotolabel(&machp()->sched);
  884. }
  885. }
  886. if(up->notepending) {
  887. up->notepending = 0;
  888. splx(pl);
  889. if(up->procctl == Proc_exitme && up->closingfgrp)
  890. forceclosefgrp();
  891. error(Eintr);
  892. }
  893. splx(pl);
  894. }
  895. static int
  896. tfn(void *arg)
  897. {
  898. Proc *up = externup();
  899. return up->trend == nil || up->tfn(arg);
  900. }
  901. void
  902. twakeup(Ureg*_, Timer *t)
  903. {
  904. Proc *p;
  905. Rendez *trend;
  906. p = t->ta;
  907. trend = p->trend;
  908. p->trend = 0;
  909. if(trend)
  910. wakeup(trend);
  911. }
  912. void
  913. tsleep(Rendez *r, int (*fn)(void*), void *arg, int32_t ms)
  914. {
  915. Proc *up = externup();
  916. if (up->Timer.tt){
  917. print("tsleep: timer active: mode %d, tf %#p\n",
  918. up->Timer.tmode, up->Timer.tf);
  919. timerdel(&up->Timer);
  920. }
  921. up->Timer.tns = MS2NS(ms);
  922. up->Timer.tf = twakeup;
  923. up->Timer.tmode = Trelative;
  924. up->Timer.ta = up;
  925. up->trend = r;
  926. up->tfn = fn;
  927. timeradd(&up->Timer);
  928. if(waserror()){
  929. timerdel(&up->Timer);
  930. nexterror();
  931. }
  932. sleep(r, tfn, arg);
  933. if (up->Timer.tt)
  934. timerdel(&up->Timer);
  935. up->Timer.twhen = 0;
  936. poperror();
  937. }
  938. /*
  939. * Expects that only one process can call wakeup for any given Rendez.
  940. * We hold both locks to ensure that r->p and p->r remain consistent.
  941. * Richard Miller has a better solution that doesn't require both to
  942. * be held simultaneously, but I'm a paranoid - presotto.
  943. */
  944. Proc*
  945. wakeup(Rendez *r)
  946. {
  947. Mpl pl;
  948. Proc *p;
  949. pl = splhi();
  950. lock(&r->l);
  951. p = r->_p;
  952. if(p != nil){
  953. lock(&p->rlock);
  954. if(p->state != Wakeme || p->r != r)
  955. panic("wakeup: state");
  956. r->_p = nil;
  957. p->r = nil;
  958. ready(p);
  959. unlock(&p->rlock);
  960. }
  961. unlock(&r->l);
  962. splx(pl);
  963. return p;
  964. }
  965. /*
  966. * if waking a sleeping process, this routine must hold both
  967. * p->rlock and r->lock. However, it can't know them in
  968. * the same order as wakeup causing a possible lock ordering
  969. * deadlock. We break the deadlock by giving up the p->rlock
  970. * lock if we can't get the r->lock and retrying.
  971. */
  972. int
  973. postnote(Proc *p, int dolock, char *n, int flag)
  974. {
  975. Mpl pl;
  976. int ret;
  977. Rendez *r;
  978. Proc *d, **l;
  979. if(dolock)
  980. qlock(&p->debug);
  981. if(flag != NUser && (p->notify == 0 || p->notified))
  982. p->nnote = 0;
  983. ret = 0;
  984. if(p->nnote < NNOTE) {
  985. strcpy(p->note[p->nnote].msg, n);
  986. p->note[p->nnote++].flag = flag;
  987. ret = 1;
  988. }
  989. p->notepending = 1;
  990. /* NIX */
  991. if(p->state == Exotic){
  992. /* it could be that the process is not running in the
  993. * AC when we interrupt the AC, but then we'd only get
  994. * an extra interrupt in the AC, and nothing should
  995. * happen.
  996. */
  997. intrac(p);
  998. }
  999. if(dolock)
  1000. qunlock(&p->debug);
  1001. /* this loop is to avoid lock ordering problems. */
  1002. for(;;){
  1003. pl = splhi();
  1004. lock(&p->rlock);
  1005. r = p->r;
  1006. /* waiting for a wakeup? */
  1007. if(r == nil)
  1008. break; /* no */
  1009. /* try for the second lock */
  1010. if(canlock(&r->l)){
  1011. if(p->state != Wakeme || r->_p != p)
  1012. panic("postnote: state %d %d %d", r->_p != p, p->r != r, p->state);
  1013. p->r = nil;
  1014. r->_p = nil;
  1015. ready(p);
  1016. unlock(&r->l);
  1017. break;
  1018. }
  1019. /* give other process time to get out of critical section and try again */
  1020. unlock(&p->rlock);
  1021. splx(pl);
  1022. sched();
  1023. }
  1024. unlock(&p->rlock);
  1025. splx(pl);
  1026. if(p->state != Rendezvous){
  1027. if(p->state == Semdown)
  1028. ready(p);
  1029. return ret;
  1030. }
  1031. /* Try and pull out of a rendezvous */
  1032. lock(&p->rgrp->r.l);
  1033. if(p->state == Rendezvous) {
  1034. p->rendval = ~0;
  1035. l = &REND(p->rgrp, p->rendtag);
  1036. for(d = *l; d; d = d->rendhash) {
  1037. if(d == p) {
  1038. *l = p->rendhash;
  1039. break;
  1040. }
  1041. l = &d->rendhash;
  1042. }
  1043. ready(p);
  1044. }
  1045. unlock(&p->rgrp->r.l);
  1046. return ret;
  1047. }
  1048. /*
  1049. * weird thing: keep at most NBROKEN around
  1050. */
  1051. #define NBROKEN 4
  1052. struct
  1053. {
  1054. QLock QLock;
  1055. int n;
  1056. Proc *p[NBROKEN];
  1057. }broken;
  1058. void
  1059. addbroken(Proc *p)
  1060. {
  1061. Proc *up = externup();
  1062. qlock(&broken.QLock);
  1063. if(broken.n == NBROKEN) {
  1064. ready(broken.p[0]);
  1065. memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1));
  1066. --broken.n;
  1067. }
  1068. broken.p[broken.n++] = p;
  1069. qunlock(&broken.QLock);
  1070. stopac();
  1071. edfstop(up);
  1072. p->state = Broken;
  1073. p->psstate = 0;
  1074. sched();
  1075. }
  1076. void
  1077. unbreak(Proc *p)
  1078. {
  1079. int b;
  1080. qlock(&broken.QLock);
  1081. for(b=0; b < broken.n; b++)
  1082. if(broken.p[b] == p) {
  1083. broken.n--;
  1084. memmove(&broken.p[b], &broken.p[b+1],
  1085. sizeof(Proc*)*(NBROKEN-(b+1)));
  1086. ready(p);
  1087. break;
  1088. }
  1089. qunlock(&broken.QLock);
  1090. }
  1091. int
  1092. freebroken(void)
  1093. {
  1094. int i, n;
  1095. qlock(&broken.QLock);
  1096. n = broken.n;
  1097. for(i=0; i<n; i++) {
  1098. ready(broken.p[i]);
  1099. broken.p[i] = 0;
  1100. }
  1101. broken.n = 0;
  1102. qunlock(&broken.QLock);
  1103. return n;
  1104. }
  1105. void
  1106. pexit(char *exitstr, int freemem)
  1107. {
  1108. Proc *up = externup();
  1109. Proc *p;
  1110. Segment **s, **es;
  1111. uint64_t utime, stime;
  1112. Waitq *wq, *f, *next;
  1113. Fgrp *fgrp;
  1114. Egrp *egrp;
  1115. Rgrp *rgrp;
  1116. Pgrp *pgrp;
  1117. Chan *dot;
  1118. if(0 && up->nfullq > 0)
  1119. iprint(" %s=%d", up->text, up->nfullq);
  1120. if(0 && up->nicc > 0)
  1121. iprint(" [%s nicc %u tctime %llu actime %llu]\n",
  1122. up->text, up->nicc, up->tctime, up->actime);
  1123. if(up->syscalltrace != nil)
  1124. free(up->syscalltrace);
  1125. up->syscalltrace = nil;
  1126. up->alarm = 0;
  1127. if (up->Timer.tt)
  1128. timerdel(&up->Timer);
  1129. if(up->trace)
  1130. proctrace(up, SDead, 0);
  1131. /* nil out all the resources under lock (free later) */
  1132. qlock(&up->debug);
  1133. fgrp = up->fgrp;
  1134. up->fgrp = nil;
  1135. egrp = up->egrp;
  1136. up->egrp = nil;
  1137. rgrp = up->rgrp;
  1138. up->rgrp = nil;
  1139. pgrp = up->pgrp;
  1140. up->pgrp = nil;
  1141. dot = up->dot;
  1142. up->dot = nil;
  1143. qunlock(&up->debug);
  1144. if(fgrp)
  1145. closefgrp(fgrp);
  1146. if(egrp)
  1147. closeegrp(egrp);
  1148. if(rgrp)
  1149. closergrp(rgrp);
  1150. if(dot)
  1151. cclose(dot);
  1152. if(pgrp)
  1153. closepgrp(pgrp);
  1154. /*
  1155. * if not a kernel process and have a parent,
  1156. * do some housekeeping.
  1157. */
  1158. if(up->kp == 0) {
  1159. p = up->parent;
  1160. if(p == 0) {
  1161. if(exitstr == 0)
  1162. exitstr = "unknown";
  1163. panic("boot process died: %s", exitstr);
  1164. }
  1165. while(waserror())
  1166. ;
  1167. wq = smalloc(sizeof(Waitq));
  1168. poperror();
  1169. wq->w.pid = up->pid;
  1170. utime = up->time[TUser] + up->time[TCUser];
  1171. stime = up->time[TSys] + up->time[TCSys];
  1172. wq->w.time[TUser] = tk2ms(utime);
  1173. wq->w.time[TSys] = tk2ms(stime);
  1174. wq->w.time[TReal] = tk2ms(MACHP(0)->ticks - up->time[TReal]);
  1175. if(exitstr && exitstr[0])
  1176. snprint(wq->w.msg, sizeof(wq->w.msg), "%s %d: %s",
  1177. up->text, up->pid, exitstr);
  1178. else
  1179. wq->w.msg[0] = '\0';
  1180. lock(&p->exl);
  1181. /*
  1182. * Check that parent is still alive.
  1183. */
  1184. if(p->pid == up->parentpid && p->state != Broken) {
  1185. p->nchild--;
  1186. p->time[TCUser] += utime;
  1187. p->time[TCSys] += stime;
  1188. /*
  1189. * If there would be more than 128 wait records
  1190. * processes for my parent, then don't leave a wait
  1191. * record behind. This helps prevent badly written
  1192. * daemon processes from accumulating lots of wait
  1193. * records.
  1194. */
  1195. if(p->nwait < 128) {
  1196. wq->next = p->waitq;
  1197. p->waitq = wq;
  1198. p->nwait++;
  1199. wq = nil;
  1200. wakeup(&p->waitr);
  1201. }
  1202. }
  1203. unlock(&p->exl);
  1204. if(wq)
  1205. free(wq);
  1206. }
  1207. if(!freemem)
  1208. addbroken(up);
  1209. qlock(&up->seglock);
  1210. es = &up->seg[NSEG];
  1211. for(s = up->seg; s < es; s++) {
  1212. if(*s) {
  1213. putseg(*s);
  1214. *s = 0;
  1215. }
  1216. }
  1217. qunlock(&up->seglock);
  1218. lock(&up->exl); /* Prevent my children from leaving waits */
  1219. psunhash(up);
  1220. up->pid = 0;
  1221. wakeup(&up->waitr);
  1222. unlock(&up->exl);
  1223. for(f = up->waitq; f; f = next) {
  1224. next = f->next;
  1225. free(f);
  1226. }
  1227. /* release debuggers */
  1228. qlock(&up->debug);
  1229. if(up->pdbg) {
  1230. wakeup(&up->pdbg->sleep);
  1231. up->pdbg = 0;
  1232. }
  1233. qunlock(&up->debug);
  1234. /* Sched must not loop for these locks */
  1235. lock(&procalloc.l);
  1236. lock(&pga.l);
  1237. stopac();
  1238. //stopnixproc();
  1239. edfstop(up);
  1240. up->state = Moribund;
  1241. sched();
  1242. panic("pexit");
  1243. }
  1244. int
  1245. haswaitq(void *x)
  1246. {
  1247. Proc *p;
  1248. p = (Proc *)x;
  1249. return p->waitq != 0;
  1250. }
  1251. int
  1252. pwait(Waitmsg *w)
  1253. {
  1254. Proc *up = externup();
  1255. int cpid;
  1256. Waitq *wq;
  1257. if(!canqlock(&up->qwaitr))
  1258. error(Einuse);
  1259. if(waserror()) {
  1260. qunlock(&up->qwaitr);
  1261. nexterror();
  1262. }
  1263. lock(&up->exl);
  1264. if(up->nchild == 0 && up->waitq == 0) {
  1265. unlock(&up->exl);
  1266. error(Enochild);
  1267. }
  1268. unlock(&up->exl);
  1269. sleep(&up->waitr, haswaitq, up);
  1270. lock(&up->exl);
  1271. wq = up->waitq;
  1272. up->waitq = wq->next;
  1273. up->nwait--;
  1274. unlock(&up->exl);
  1275. qunlock(&up->qwaitr);
  1276. poperror();
  1277. if(w)
  1278. memmove(w, &wq->w, sizeof(Waitmsg));
  1279. cpid = wq->w.pid;
  1280. free(wq);
  1281. return cpid;
  1282. }
  1283. void
  1284. dumpaproc(Proc *p)
  1285. {
  1286. uintptr bss;
  1287. char *s;
  1288. if(p == 0)
  1289. return;
  1290. bss = 0;
  1291. print("dumpaproc: what are HSEG and BSEG");
  1292. #if 0
  1293. if(p->seg[HSEG])
  1294. bss = p->seg[HSEG]->top;
  1295. else if(p->seg[BSEG])
  1296. bss = p->seg[BSEG]->top;
  1297. #endif
  1298. s = p->psstate;
  1299. if(s == 0)
  1300. s = statename[p->state];
  1301. print("%3d:%10s pc %#p dbgpc %#p %8s (%s) ut %ld st %ld bss %#p qpc %#p nl %d nd %lu lpc %#p pri %lu\n",
  1302. p->pid, p->text, p->pc, dbgpc(p), s, statename[p->state],
  1303. p->time[0], p->time[1], bss, p->qpc, p->nlocks,
  1304. p->delaysched, p->lastlock ? 0/*p->lastlock->pc*/ : 0, p->priority);
  1305. }
  1306. void
  1307. procdump(void)
  1308. {
  1309. Proc *up = externup();
  1310. int i;
  1311. Proc *p;
  1312. if(up)
  1313. print("up %d\n", up->pid);
  1314. else
  1315. print("no current process\n");
  1316. for(i=0; (p = psincref(i)) != nil; i++) {
  1317. if(p->state != Dead)
  1318. dumpaproc(p);
  1319. psdecref(p);
  1320. }
  1321. }
  1322. /*
  1323. * wait till all processes have flushed their mmu
  1324. * state about segement s
  1325. */
  1326. void
  1327. procflushseg(Segment *s)
  1328. {
  1329. int i, ns, nm, nwait;
  1330. Proc *p;
  1331. /*
  1332. * tell all processes with this
  1333. * segment to flush their mmu's
  1334. */
  1335. nwait = 0;
  1336. for(i=0; (p = psincref(i)) != nil; i++) {
  1337. if(p->state == Dead){
  1338. psdecref(p);
  1339. continue;
  1340. }
  1341. for(ns = 0; ns < NSEG; ns++){
  1342. if(p->seg[ns] == s){
  1343. p->newtlb = 1;
  1344. for(nm = 0; nm < sys->nmach; nm++){
  1345. if(MACHP(nm)->proc == p){
  1346. MACHP(nm)->mmuflush = 1;
  1347. nwait++;
  1348. }
  1349. }
  1350. break;
  1351. }
  1352. }
  1353. psdecref(p);
  1354. }
  1355. if(nwait == 0)
  1356. return;
  1357. /*
  1358. * wait for all processors to take a clock interrupt
  1359. * and flush their mmu's.
  1360. * NIX BUG: this won't work if another core is in AC mode.
  1361. * In that case we must IPI it, but only if that core is
  1362. * using this segment.
  1363. */
  1364. for(nm = 0; nm < sys->nmach; nm++)
  1365. if(MACHP(nm) != machp())
  1366. while(MACHP(nm)->mmuflush)
  1367. sched();
  1368. }
  1369. void
  1370. scheddump(void)
  1371. {
  1372. Proc *p;
  1373. Sched *sch;
  1374. Schedq *rq;
  1375. for(sch = run; sch < &run[Nsched]; sch++){
  1376. for(rq = &sch->runq[Nrq-1]; rq >= sch->runq; rq--){
  1377. if(rq->head == 0)
  1378. continue;
  1379. print("sch%ld rq%ld:", sch - run, rq-sch->runq);
  1380. for(p = rq->head; p; p = p->rnext)
  1381. print(" %d(%lu)", p->pid, machp()->ticks - p->readytime);
  1382. print("\n");
  1383. delay(150);
  1384. }
  1385. print("sch%ld: nrdy %d\n", sch - run, sch->nrdy);
  1386. }
  1387. }
  1388. void
  1389. kproc(char *name, void (*func)(void *), void *arg)
  1390. {
  1391. Proc *up = externup();
  1392. Proc *p;
  1393. static Pgrp *kpgrp;
  1394. p = newproc();
  1395. p->psstate = 0;
  1396. p->procmode = 0640;
  1397. p->kp = 1;
  1398. p->noswap = 1;
  1399. p->scallnr = up->scallnr;
  1400. memmove(p->arg, up->arg, sizeof(up->arg));
  1401. p->nerrlab = 0;
  1402. p->slash = up->slash;
  1403. p->dot = up->dot;
  1404. if(p->dot)
  1405. incref(&p->dot->r);
  1406. memmove(p->note, up->note, sizeof(p->note));
  1407. p->nnote = up->nnote;
  1408. p->notified = 0;
  1409. p->lastnote = up->lastnote;
  1410. p->notify = up->notify;
  1411. p->ureg = 0;
  1412. p->dbgreg = 0;
  1413. procpriority(p, PriKproc, 0);
  1414. kprocchild(p, func, arg);
  1415. kstrdup(&p->user, eve);
  1416. kstrdup(&p->text, name);
  1417. if(kpgrp == 0)
  1418. kpgrp = newpgrp();
  1419. p->pgrp = kpgrp;
  1420. incref(&kpgrp->r);
  1421. memset(p->time, 0, sizeof(p->time));
  1422. p->time[TReal] = MACHP(0)->ticks;
  1423. ready(p);
  1424. /*
  1425. * since the bss/data segments are now shareable,
  1426. * any mmu info about this process is now stale
  1427. * and has to be discarded.
  1428. */
  1429. p->newtlb = 1;
  1430. mmuflush();
  1431. }
  1432. /*
  1433. * called splhi() by notify(). See comment in notify for the
  1434. * reasoning.
  1435. */
  1436. void
  1437. procctl(Proc *p)
  1438. {
  1439. Proc *up = externup();
  1440. Mpl pl;
  1441. char *state;
  1442. switch(p->procctl) {
  1443. case Proc_exitbig:
  1444. spllo();
  1445. pexit("Killed: Insufficient physical memory", 1);
  1446. case Proc_exitme:
  1447. spllo(); /* pexit has locks in it */
  1448. pexit("Killed", 1);
  1449. case Proc_traceme:
  1450. if(p->nnote == 0)
  1451. return;
  1452. /* No break */
  1453. case Proc_stopme:
  1454. p->procctl = 0;
  1455. state = p->psstate;
  1456. p->psstate = "Stopped";
  1457. /* free a waiting debugger */
  1458. pl = spllo();
  1459. qlock(&p->debug);
  1460. if(p->pdbg) {
  1461. wakeup(&p->pdbg->sleep);
  1462. p->pdbg = 0;
  1463. }
  1464. qunlock(&p->debug);
  1465. splhi();
  1466. p->state = Stopped;
  1467. sched();
  1468. p->psstate = state;
  1469. splx(pl);
  1470. return;
  1471. case Proc_toac:
  1472. p->procctl = 0;
  1473. /*
  1474. * This pretends to return from the system call,
  1475. * by moving to a core, but never returns (unless
  1476. * the process gets moved back to a TC.)
  1477. */
  1478. spllo();
  1479. runacore();
  1480. return;
  1481. case Proc_totc:
  1482. p->procctl = 0;
  1483. if(p != up)
  1484. panic("procctl: stopac: p != up");
  1485. spllo();
  1486. stopac();
  1487. return;
  1488. }
  1489. }
  1490. void
  1491. error(char *err)
  1492. {
  1493. Proc *up = externup();
  1494. spllo();
  1495. assert(up->nerrlab < NERR);
  1496. kstrcpy(up->errstr, err, ERRMAX);
  1497. setlabel(&up->errlab[NERR-1]);
  1498. nexterror();
  1499. }
  1500. void
  1501. nexterror(void)
  1502. {
  1503. Proc *up = externup();
  1504. gotolabel(&up->errlab[--up->nerrlab]);
  1505. }
  1506. void
  1507. exhausted(char *resource)
  1508. {
  1509. char buf[ERRMAX];
  1510. sprint(buf, "no free %s", resource);
  1511. iprint("%s\n", buf);
  1512. error(buf);
  1513. }
  1514. void
  1515. killbig(char *why)
  1516. {
  1517. int i, x;
  1518. Segment *s;
  1519. uint32_t l, max;
  1520. Proc *p, *kp;
  1521. max = 0;
  1522. kp = nil;
  1523. for(x = 0; (p = psincref(x)) != nil; x++) {
  1524. if(p->state == Dead || p->kp){
  1525. psdecref(p);
  1526. continue;
  1527. }
  1528. l = 0;
  1529. for(i=1; i<NSEG; i++) {
  1530. s = p->seg[i];
  1531. if(s != 0)
  1532. l += s->top - s->base;
  1533. }
  1534. if(l > max && ((p->procmode&0222) || strcmp(eve, p->user)!=0)) {
  1535. if(kp != nil)
  1536. psdecref(kp);
  1537. kp = p;
  1538. max = l;
  1539. }
  1540. else
  1541. psdecref(p);
  1542. }
  1543. if(kp == nil)
  1544. return;
  1545. print("%d: %s killed: %s\n", kp->pid, kp->text, why);
  1546. for(x = 0; (p = psincref(x)) != nil; x++) {
  1547. if(p->state == Dead || p->kp){
  1548. psdecref(p);
  1549. continue;
  1550. }
  1551. panic("killbig");
  1552. // if(p != kp && p->seg[BSEG] && p->seg[BSEG] == kp->seg[BSEG])
  1553. // p->procctl = Proc_exitbig;
  1554. psdecref(p);
  1555. }
  1556. kp->procctl = Proc_exitbig;
  1557. for(i = 0; i < NSEG; i++) {
  1558. s = kp->seg[i];
  1559. if(s != 0 && canqlock(&s->lk)) {
  1560. mfreeseg(s, s->base, (s->top - s->base)/BIGPGSZ);
  1561. qunlock(&s->lk);
  1562. }
  1563. }
  1564. psdecref(kp);
  1565. }
  1566. /*
  1567. * change ownership to 'new' of all processes owned by 'old'. Used when
  1568. * eve changes.
  1569. */
  1570. void
  1571. renameuser(char *old, char *new)
  1572. {
  1573. int i;
  1574. Proc *p;
  1575. for(i = 0; (p = psincref(i)) != nil; i++){
  1576. if(p->user!=nil && strcmp(old, p->user)==0)
  1577. kstrdup(&p->user, new);
  1578. psdecref(p);
  1579. }
  1580. }
  1581. /*
  1582. * time accounting called by clock() splhi'd
  1583. * only cpu0 computes system load average
  1584. */
  1585. void
  1586. accounttime(void)
  1587. {
  1588. Sched *sch;
  1589. Proc *p;
  1590. uint32_t n, per;
  1591. sch = machp()->sch;
  1592. p = machp()->proc;
  1593. if(p) {
  1594. if(machp()->machno == 0)
  1595. sch->nrun++;
  1596. p->time[p->insyscall]++;
  1597. }
  1598. /* calculate decaying duty cycles */
  1599. n = perfticks();
  1600. per = n - machp()->perf.last;
  1601. machp()->perf.last = n;
  1602. per = (machp()->perf.period*(HZ-1) + per)/HZ;
  1603. if(per != 0)
  1604. machp()->perf.period = per;
  1605. machp()->perf.avg_inidle = (machp()->perf.avg_inidle*(HZ-1)+machp()->perf.inidle)/HZ;
  1606. machp()->perf.inidle = 0;
  1607. machp()->perf.avg_inintr = (machp()->perf.avg_inintr*(HZ-1)+machp()->perf.inintr)/HZ;
  1608. machp()->perf.inintr = 0;
  1609. /* only one processor gets to compute system load averages */
  1610. if(machp()->machno != 0)
  1611. return;
  1612. /*
  1613. * calculate decaying load average.
  1614. * if we decay by (n-1)/n then it takes
  1615. * n clock ticks to go from load L to .36 L once
  1616. * things quiet down. it takes about 5 n clock
  1617. * ticks to go to zero. so using HZ means this is
  1618. * approximately the load over the last second,
  1619. * with a tail lasting about 5 seconds.
  1620. */
  1621. n = sch->nrun;
  1622. sch->nrun = 0;
  1623. n = (sch->nrdy+n)*1000;
  1624. machp()->load = (machp()->load*(HZ-1)+n)/HZ;
  1625. }
  1626. void
  1627. halt(void)
  1628. {
  1629. if(machp()->sch->nrdy != 0)
  1630. return;
  1631. hardhalt();
  1632. }