proc.c 34 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. #include "../port/error.h"
  15. #include "../port/edf.h"
  16. #include "errstr.h"
  17. #include <trace.h>
  18. enum
  19. {
  20. Scaling=2,
  21. AMPmincores = 5,
  22. };
  23. Ref noteidalloc;
  24. static Ref pidalloc;
  25. static Sched run;
  26. struct Procalloc procalloc;
  27. extern Proc* psalloc(void);
  28. extern void pshash(Proc*);
  29. extern void psrelease(Proc*);
  30. extern void psunhash(Proc*);
  31. static int reprioritize(Proc*);
  32. static void updatecpu(Proc*);
  33. static void rebalance(void);
  34. char *statename[] =
  35. { /* BUG: generate automatically */
  36. "Dead",
  37. "Moribund",
  38. "Ready",
  39. "Scheding",
  40. "Running",
  41. "Queueing",
  42. "QueueingR",
  43. "QueueingW",
  44. "Wakeme",
  45. "Broken",
  46. "Stopped",
  47. "Rendez",
  48. "Waitrelease",
  49. "Exotic",
  50. "Down",
  51. };
  52. Sched*
  53. procsched(Proc *)
  54. {
  55. return &run;
  56. }
  57. /*
  58. * bad planning, once more.
  59. */
  60. void
  61. procinit0(void)
  62. {
  63. run.schedgain = 30;
  64. }
  65. /*
  66. * Always splhi()'ed.
  67. */
  68. void
  69. schedinit(void) /* never returns */
  70. {
  71. Edf *e;
  72. m->inidle = 1;
  73. m->proc = nil;
  74. ainc(&run.nmach);
  75. setlabel(&m->sched);
  76. if(up) {
  77. if((e = up->edf) && (e->flags & Admitted))
  78. edfrecord(up);
  79. m->qstart = 0;
  80. m->qexpired = 0;
  81. coherence();
  82. m->proc = 0;
  83. switch(up->state) {
  84. case Running:
  85. ready(up);
  86. break;
  87. case Moribund:
  88. up->state = Dead;
  89. stopac();
  90. edfstop(up);
  91. if (up->edf)
  92. free(up->edf);
  93. up->edf = nil;
  94. /*
  95. * Holding locks from pexit:
  96. * procalloc
  97. * pga
  98. */
  99. mmurelease(up);
  100. unlock(&pga);
  101. psrelease(up);
  102. unlock(&procalloc);
  103. break;
  104. }
  105. up->mach = nil;
  106. updatecpu(up);
  107. up = nil;
  108. }
  109. sched();
  110. }
  111. /*
  112. * Check if the stack has more than 4*KiB free.
  113. * Do not call panic, the stack is gigantic.
  114. */
  115. static void
  116. stackok(void)
  117. {
  118. char dummy;
  119. if(&dummy < (char*)up->kstack + 4*KiB){
  120. print("tc kernel stack overflow, cpu%d stopped\n", m->machno);
  121. DONE();
  122. }
  123. }
  124. /*
  125. * If changing this routine, look also at sleep(). It
  126. * contains a copy of the guts of sched().
  127. */
  128. void
  129. sched(void)
  130. {
  131. Proc *p;
  132. if(m->ilockdepth)
  133. panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p",
  134. m->machno,
  135. m->ilockdepth,
  136. up? up->lastilock: nil,
  137. (up && up->lastilock)? up->lastilock->pc: 0,
  138. getcallerpc(&p+2));
  139. if(up){
  140. /*
  141. * Delay the sched until the process gives up the locks
  142. * it is holding. This avoids dumb lock loops.
  143. * Don't delay if the process is Moribund.
  144. * It called sched to die.
  145. * But do sched eventually. This avoids a missing unlock
  146. * from hanging the entire kernel.
  147. * But don't reschedule procs holding palloc or procalloc.
  148. * Those are far too important to be holding while asleep.
  149. *
  150. * This test is not exact. There can still be a few
  151. * instructions in the middle of taslock when a process
  152. * holds a lock but Lock.p has not yet been initialized.
  153. */
  154. if(up->nlocks)
  155. if(up->state != Moribund)
  156. if(up->delaysched < 20
  157. || pga.Lock.p == up
  158. || procalloc.Lock.p == up){
  159. up->delaysched++;
  160. run.delayedscheds++;
  161. return;
  162. }
  163. up->delaysched = 0;
  164. splhi();
  165. /* statistics */
  166. if(up->nqtrap == 0 && up->nqsyscall == 0)
  167. up->nfullq++;
  168. m->cs++;
  169. stackok();
  170. procsave(up);
  171. mmuflushtlb(m->pml4->pa);
  172. if(setlabel(&up->sched)){
  173. procrestore(up);
  174. spllo();
  175. return;
  176. }
  177. gotolabel(&m->sched);
  178. }
  179. m->inidle = 1;
  180. p = runproc(); /* core 0 never returns */
  181. m->inidle = 0;
  182. if(!p->edf){
  183. updatecpu(p);
  184. p->priority = reprioritize(p);
  185. }
  186. up = p;
  187. m->qstart = m->ticks;
  188. up->nqtrap = 0;
  189. up->nqsyscall = 0;
  190. up->state = Running;
  191. up->mach = m;
  192. m->proc = up;
  193. mmuswitch(up);
  194. assert(!up->wired || up->wired == m);
  195. gotolabel(&up->sched);
  196. }
  197. int
  198. anyready(void)
  199. {
  200. return run.runvec;
  201. }
  202. int
  203. anyhigher(void)
  204. {
  205. return run.runvec & ~((1<<(up->priority+1))-1);
  206. }
  207. /*
  208. * here once per clock tick to see if we should resched
  209. */
  210. void
  211. hzsched(void)
  212. {
  213. /* once a second, rebalance will reprioritize ready procs */
  214. if(m->machno == 0){
  215. rebalance();
  216. return;
  217. }
  218. /* with <= 4 cores, we use SMP and core 0 does not set qexpired for us */
  219. if(sys->nmach <= AMPmincores)
  220. if(m->ticks - m->qstart >= HZ/10)
  221. m->qexpired = 1;
  222. /* unless preempted, get to run */
  223. if(m->qexpired && anyready())
  224. up->delaysched++;
  225. }
  226. /*
  227. * here at the end of non-clock interrupts to see if we should preempt the
  228. * current process. Returns 1 if preempted, 0 otherwise.
  229. */
  230. int
  231. preempted(void)
  232. {
  233. if(up && up->state == Running)
  234. if(up->preempted == 0)
  235. if(anyhigher())
  236. if(!active.exiting){
  237. /* Core 0 is dispatching all interrupts, so no core
  238. * actually running a user process is ever going call preempted, unless
  239. * we consider IPIs for preemption or we distribute interrupts.
  240. * But we are going to use SMP for machines with few cores.
  241. panic("preemted used");
  242. */
  243. up->preempted = 1;
  244. sched();
  245. splhi();
  246. up->preempted = 0;
  247. return 1;
  248. }
  249. return 0;
  250. }
  251. /*
  252. * Update the cpu time average for this particular process,
  253. * which is about to change from up -> not up or vice versa.
  254. * p->lastupdate is the last time an updatecpu happened.
  255. *
  256. * The cpu time average is a decaying average that lasts
  257. * about D clock ticks. D is chosen to be approximately
  258. * the cpu time of a cpu-intensive "quick job". A job has to run
  259. * for approximately D clock ticks before we home in on its
  260. * actual cpu usage. Thus if you manage to get in and get out
  261. * quickly, you won't be penalized during your burst. Once you
  262. * start using your share of the cpu for more than about D
  263. * clock ticks though, your p->cpu hits 1000 (1.0) and you end up
  264. * below all the other quick jobs. Interactive tasks, because
  265. * they basically always use less than their fair share of cpu,
  266. * will be rewarded.
  267. *
  268. * If the process has not been running, then we want to
  269. * apply the filter
  270. *
  271. * cpu = cpu * (D-1)/D
  272. *
  273. * n times, yielding
  274. *
  275. * cpu = cpu * ((D-1)/D)^n
  276. *
  277. * but D is big enough that this is approximately
  278. *
  279. * cpu = cpu * (D-n)/D
  280. *
  281. * so we use that instead.
  282. *
  283. * If the process has been running, we apply the filter to
  284. * 1 - cpu, yielding a similar equation. Note that cpu is
  285. * stored in fixed point (* 1000).
  286. *
  287. * Updatecpu must be called before changing up, in order
  288. * to maintain accurate cpu usage statistics. It can be called
  289. * at any time to bring the stats for a given proc up-to-date.
  290. */
  291. static void
  292. updatecpu(Proc *p)
  293. {
  294. int D, n, t, ocpu;
  295. if(p->edf)
  296. return;
  297. t = sys->ticks*Scaling + Scaling/2;
  298. n = t - p->lastupdate;
  299. p->lastupdate = t;
  300. if(n == 0)
  301. return;
  302. D = run.schedgain*HZ*Scaling;
  303. if(n > D)
  304. n = D;
  305. ocpu = p->cpu;
  306. if(p != up)
  307. p->cpu = (ocpu*(D-n))/D;
  308. else{
  309. t = 1000 - ocpu;
  310. t = (t*(D-n))/D;
  311. p->cpu = 1000 - t;
  312. }
  313. //iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu);
  314. }
  315. /*
  316. * On average, p has used p->cpu of a cpu recently.
  317. * Its fair share is nmach/m->load of a cpu. If it has been getting
  318. * too much, penalize it. If it has been getting not enough, reward it.
  319. * I don't think you can get much more than your fair share that
  320. * often, so most of the queues are for using less. Having a priority
  321. * of 3 means you're just right. Having a higher priority (up to p->basepri)
  322. * means you're not using as much as you could.
  323. */
  324. static int
  325. reprioritize(Proc *p)
  326. {
  327. int fairshare, n, load, ratio;
  328. load = sys->load;
  329. if(load == 0)
  330. return p->basepri;
  331. /*
  332. * fairshare = 1.000 * conf.nproc * 1.000/load,
  333. * except the decimal point is moved three places
  334. * on both load and fairshare.
  335. */
  336. fairshare = (sys->nmach*1000*1000)/load;
  337. n = p->cpu;
  338. if(n == 0)
  339. n = 1;
  340. ratio = (fairshare+n/2) / n;
  341. if(ratio > p->basepri)
  342. ratio = p->basepri;
  343. if(ratio < 0)
  344. panic("reprioritize");
  345. //iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio);
  346. return ratio;
  347. }
  348. /*
  349. * add a process to a scheduling queue
  350. */
  351. static void
  352. queueproc(Sched *sch, Schedq *rq, Proc *p, int locked)
  353. {
  354. int pri;
  355. pri = rq - sch->runq;
  356. if(!locked)
  357. lock(sch);
  358. else if(canlock(sch))
  359. panic("queueproc: locked and can lock");
  360. p->priority = pri;
  361. p->rnext = 0;
  362. if(rq->tail)
  363. rq->tail->rnext = p;
  364. else
  365. rq->head = p;
  366. rq->tail = p;
  367. rq->n++;
  368. sch->nrdy++;
  369. sch->runvec |= 1<<pri;
  370. if(!locked)
  371. unlock(sch);
  372. }
  373. /*
  374. * try to remove a process from a scheduling queue (called splhi)
  375. */
  376. Proc*
  377. dequeueproc(Sched *sch, Schedq *rq, Proc *tp)
  378. {
  379. Proc *l, *p;
  380. if(!canlock(sch))
  381. return nil;
  382. /*
  383. * the queue may have changed before we locked runq,
  384. * refind the target process.
  385. */
  386. l = 0;
  387. for(p = rq->head; p; p = p->rnext){
  388. if(p == tp)
  389. break;
  390. l = p;
  391. }
  392. /*
  393. * p->mach==0 only when process state is saved
  394. */
  395. if(p == 0 || p->mach){
  396. unlock(sch);
  397. return nil;
  398. }
  399. if(p->rnext == 0)
  400. rq->tail = l;
  401. if(l)
  402. l->rnext = p->rnext;
  403. else
  404. rq->head = p->rnext;
  405. if(rq->head == nil)
  406. sch->runvec &= ~(1<<(rq-sch->runq));
  407. rq->n--;
  408. sch->nrdy--;
  409. if(p->state != Ready)
  410. print("dequeueproc %s %d %s\n", p->text, p->pid, statename[p->state]);
  411. unlock(sch);
  412. return p;
  413. }
  414. static void
  415. schedready(Sched *sch, Proc *p, int locked)
  416. {
  417. Mpl pl;
  418. int pri;
  419. Schedq *rq;
  420. pl = splhi();
  421. if(edfready(p)){
  422. splx(pl);
  423. return;
  424. }
  425. updatecpu(p);
  426. pri = reprioritize(p);
  427. p->priority = pri;
  428. rq = &sch->runq[pri];
  429. p->state = Ready;
  430. queueproc(sch, rq, p, locked);
  431. if(p->trace)
  432. proctrace(p, SReady, 0);
  433. splx(pl);
  434. }
  435. /*
  436. * ready(p) picks a new priority for a process and sticks it in the
  437. * runq for that priority.
  438. */
  439. void
  440. ready(Proc *p)
  441. {
  442. schedready(procsched(p), p, 0);
  443. }
  444. /*
  445. * yield the processor and drop our priority
  446. */
  447. void
  448. yield(void)
  449. {
  450. if(anyready()){
  451. /* pretend we just used 1/2 tick */
  452. up->lastupdate -= Scaling/2;
  453. sched();
  454. }
  455. }
  456. /*
  457. * recalculate priorities once a second. We need to do this
  458. * since priorities will otherwise only be recalculated when
  459. * the running process blocks.
  460. */
  461. static void
  462. rebalance(void)
  463. {
  464. Mpl pl;
  465. int pri, npri, t;
  466. Schedq *rq;
  467. Proc *p;
  468. t = m->ticks;
  469. if(t - run.balancetime < HZ)
  470. return;
  471. run.balancetime = t;
  472. for(pri=0, rq=run.runq; pri<Npriq; pri++, rq++){
  473. another:
  474. p = rq->head;
  475. if(p == nil)
  476. continue;
  477. if(p->mp != m)
  478. continue;
  479. if(pri == p->basepri)
  480. continue;
  481. updatecpu(p);
  482. npri = reprioritize(p);
  483. if(npri != pri){
  484. pl = splhi();
  485. p = dequeueproc(&run, rq, p);
  486. if(p)
  487. queueproc(&run, &run.runq[npri], p, 0);
  488. splx(pl);
  489. goto another;
  490. }
  491. }
  492. }
  493. /*
  494. * Process p is ready to run, but there's no available core.
  495. * Try to make a core available by
  496. * 1. preempting a process with lower priority, or
  497. * 2. preempting one with the same priority that had more than HZ/10, or
  498. * 3. rescheduling one that run more than HZ, in the hope he gets his priority lowered.
  499. */
  500. static void
  501. preemptfor(Proc *p)
  502. {
  503. uint32_t delta;
  504. uint i, j, rr;
  505. Proc *mup;
  506. Mach *mp;
  507. assert(m->machno == 0);
  508. /*
  509. * try to preempt a lower priority process first, default back to
  510. * round robin otherwise.
  511. */
  512. for(rr = 0; rr < 2; rr++)
  513. for(i = 0; i < MACHMAX; i++){
  514. j = pickcore(p->color, i);
  515. if((mp = sys->machptr[j]) != nil && mp->online && mp->nixtype == NIXTC){
  516. if(mp == m)
  517. continue;
  518. /*
  519. * Caution here: mp->proc can change, even die.
  520. */
  521. mup = mp->proc;
  522. if(mup == nil) /* one got idle */
  523. return;
  524. delta = mp->ticks - mp->qstart;
  525. if(mup->priority < p->priority){
  526. mp->qexpired = 1;
  527. return;
  528. }
  529. if(rr && mup->priority == p->priority && delta > HZ/10){
  530. mp->qexpired = 1;
  531. return;
  532. }
  533. if(rr & delta > HZ){
  534. mp->qexpired = 1;
  535. return;
  536. }
  537. }
  538. }
  539. }
  540. /*
  541. * Scheduling thread run as the main loop of cpu 0
  542. * Used in AMP sched.
  543. */
  544. static void
  545. mach0sched(void)
  546. {
  547. Schedq *rq;
  548. Proc *p;
  549. Mach *mp;
  550. uint32_t start, now;
  551. int n, i, j;
  552. assert(m->machno == 0);
  553. acmodeset(NIXKC); /* we don't time share any more */
  554. n = 0;
  555. start = perfticks();
  556. loop:
  557. /*
  558. * find a ready process that we might run.
  559. */
  560. spllo();
  561. for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--)
  562. for(p = rq->head; p; p = p->rnext){
  563. /*
  564. * wired processes may only run when their core is available.
  565. */
  566. if(p->wired != nil){
  567. if(p->wired->proc == nil)
  568. goto found;
  569. continue;
  570. }
  571. /*
  572. * find a ready process that did run at an available core
  573. * or one that has not moved for some time.
  574. */
  575. if(p->mp == nil || p->mp->proc == nil || n>0)
  576. goto found;
  577. }
  578. /* waste time or halt the CPU */
  579. idlehands();
  580. /* remember how much time we're here */
  581. now = perfticks();
  582. m->perf.inidle += now-start;
  583. start = now;
  584. n++;
  585. goto loop;
  586. found:
  587. assert(m->machno == 0);
  588. splhi();
  589. /*
  590. * find a core for this process, but honor wiring.
  591. */
  592. mp = p->wired;
  593. if(mp != nil){
  594. if(mp->proc != nil)
  595. goto loop;
  596. }else{
  597. for(i = 0; i < MACHMAX; i++){
  598. j = pickcore(p->color, i);
  599. if((mp = sys->machptr[j]) != nil && mp->online && mp->nixtype == NIXTC)
  600. if(mp != m && mp->proc == nil)
  601. break;
  602. }
  603. if(i == MACHMAX){
  604. preemptfor(p);
  605. goto loop;
  606. }
  607. }
  608. p = dequeueproc(&run, rq, p);
  609. mp->proc = p;
  610. if(p != nil){
  611. p->state = Scheding;
  612. p->mp = mp;
  613. }
  614. n = 0;
  615. goto loop;
  616. }
  617. /*
  618. * SMP performs better than AMP with few cores.
  619. * So, leave this here by now. We should probably
  620. * write a unified version of runproc good enough for
  621. * both SMP and AMP.
  622. */
  623. static Proc*
  624. smprunproc(void)
  625. {
  626. Schedq *rq;
  627. Proc *p;
  628. uint32_t start, now;
  629. int i;
  630. start = perfticks();
  631. run.preempts++;
  632. loop:
  633. /*
  634. * find a process that last ran on this processor (affinity),
  635. * or one that hasn't moved in a while (load balancing). Every
  636. * time around the loop affinity goes down.
  637. */
  638. spllo();
  639. for(i = 0;; i++){
  640. /*
  641. * find the highest priority target process that this
  642. * processor can run given affinity constraints.
  643. *
  644. */
  645. for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--){
  646. for(p = rq->head; p; p = p->rnext){
  647. if(p->mp == nil || p->mp == sys->machptr[m->machno]
  648. || (!p->wired && i > 0))
  649. goto found;
  650. }
  651. }
  652. /* waste time or halt the CPU */
  653. idlehands();
  654. /* remember how much time we're here */
  655. now = perfticks();
  656. m->perf.inidle += now-start;
  657. start = now;
  658. }
  659. found:
  660. splhi();
  661. p = dequeueproc(&run, rq, p);
  662. if(p == nil)
  663. goto loop;
  664. p->state = Scheding;
  665. p->mp = sys->machptr[m->machno];
  666. if(edflock(p)){
  667. edfrun(p, rq == &run.runq[PriEdf]); /* start deadline timer and do admin */
  668. edfunlock();
  669. }
  670. if(p->trace)
  671. proctrace(p, SRun, 0);
  672. return p;
  673. }
  674. /*
  675. * pick a process to run.
  676. * most of this is used in AMP sched.
  677. * (on a quad core or less, we use SMP).
  678. * In the case of core 0 we always return nil, but
  679. * schedule the picked process at any other available TC.
  680. * In the case of other cores we wait until a process is given
  681. * by core 0.
  682. */
  683. Proc*
  684. runproc(void)
  685. {
  686. Schedq *rq;
  687. Proc *p;
  688. uint32_t start, now;
  689. if(sys->nmach <= AMPmincores)
  690. return smprunproc();
  691. start = perfticks();
  692. run.preempts++;
  693. rq = nil;
  694. if(m->machno != 0){
  695. do{
  696. spllo();
  697. while(m->proc == nil)
  698. idlehands();
  699. now = perfticks();
  700. m->perf.inidle += now-start;
  701. start = now;
  702. splhi();
  703. p = m->proc;
  704. }while(p == nil);
  705. p->state = Scheding;
  706. p->mp = sys->machptr[m->machno];
  707. if(edflock(p)){
  708. edfrun(p, rq == &run.runq[PriEdf]); /* start deadline timer and do admin */
  709. edfunlock();
  710. }
  711. if(p->trace)
  712. proctrace(p, SRun, 0);
  713. return p;
  714. }
  715. mach0sched();
  716. return nil; /* not reached */
  717. }
  718. int
  719. canpage(Proc *p)
  720. {
  721. int ok;
  722. Sched *sch;
  723. splhi();
  724. sch = procsched(p);
  725. lock(sch);
  726. /* Only reliable way to see if we are Running */
  727. if(p->mach == 0) {
  728. p->newtlb = 1;
  729. ok = 1;
  730. }
  731. else
  732. ok = 0;
  733. unlock(sch);
  734. spllo();
  735. return ok;
  736. }
  737. Proc*
  738. newproc(void)
  739. {
  740. Proc *p;
  741. p = psalloc();
  742. p->state = Scheding;
  743. p->psstate = "New";
  744. p->mach = 0;
  745. p->qnext = 0;
  746. p->nchild = 0;
  747. p->nwait = 0;
  748. p->waitq = 0;
  749. p->parent = 0;
  750. p->pgrp = 0;
  751. p->egrp = 0;
  752. p->fgrp = 0;
  753. p->rgrp = 0;
  754. p->pdbg = 0;
  755. p->kp = 0;
  756. if(up != nil && up->procctl == Proc_tracesyscall)
  757. p->procctl = Proc_tracesyscall;
  758. else
  759. p->procctl = 0;
  760. p->syscalltrace = nil;
  761. p->notepending = 0;
  762. p->ureg = 0;
  763. p->privatemem = 0;
  764. p->noswap = 0;
  765. p->errstr = p->errbuf0;
  766. p->syserrstr = p->errbuf1;
  767. p->errbuf0[0] = '\0';
  768. p->errbuf1[0] = '\0';
  769. p->nlocks = 0;
  770. p->delaysched = 0;
  771. p->trace = 0;
  772. kstrdup(&p->user, "*nouser");
  773. kstrdup(&p->text, "*notext");
  774. kstrdup(&p->args, "");
  775. p->nargs = 0;
  776. p->setargs = 0;
  777. memset(p->seg, 0, sizeof p->seg);
  778. p->pid = incref(&pidalloc);
  779. pshash(p);
  780. p->noteid = incref(&noteidalloc);
  781. if(p->pid <= 0 || p->noteid <= 0)
  782. panic("pidalloc");
  783. if(p->kstack == 0)
  784. p->kstack = smalloc(KSTACK);
  785. /* sched params */
  786. p->mp = 0;
  787. p->wired = 0;
  788. procpriority(p, PriNormal, 0);
  789. p->cpu = 0;
  790. p->lastupdate = sys->ticks*Scaling;
  791. p->edf = nil;
  792. p->ntrap = 0;
  793. p->nintr = 0;
  794. p->nsyscall = 0;
  795. p->nactrap = 0;
  796. p->nacsyscall = 0;
  797. p->nicc = 0;
  798. p->actime = 0ULL;
  799. p->tctime = 0ULL;
  800. p->ac = nil;
  801. p->nfullq = 0;
  802. memset(&p->PMMU, 0, sizeof p->PMMU);
  803. return p;
  804. }
  805. /*
  806. * wire this proc to a machine
  807. */
  808. void
  809. procwired(Proc *p, int bm)
  810. {
  811. Proc *pp;
  812. int i;
  813. char nwired[MACHMAX];
  814. Mach *wm;
  815. if(bm < 0){
  816. /* pick a machine to wire to */
  817. memset(nwired, 0, sizeof(nwired));
  818. p->wired = 0;
  819. for(i=0; (pp = psincref(i)) != nil; i++){
  820. wm = pp->wired;
  821. if(wm && pp->pid)
  822. nwired[wm->machno]++;
  823. psdecref(pp);
  824. }
  825. bm = 0;
  826. for(i=0; i<sys->nmach; i++)
  827. if(nwired[i] < nwired[bm])
  828. bm = i;
  829. } else {
  830. /* use the virtual machine requested */
  831. bm = bm % sys->nmach;
  832. }
  833. p->wired = sys->machptr[bm];
  834. p->mp = p->wired;
  835. /*
  836. * adjust our color to the new domain.
  837. */
  838. if(up == nil || p != up)
  839. return;
  840. up->color = corecolor(up->mp->machno);
  841. qlock(&up->seglock);
  842. for(i = 0; i < NSEG; i++)
  843. if(up->seg[i])
  844. up->seg[i]->color = up->color;
  845. qunlock(&up->seglock);
  846. }
  847. void
  848. procpriority(Proc *p, int pri, int fixed)
  849. {
  850. if(pri >= Npriq)
  851. pri = Npriq - 1;
  852. else if(pri < 0)
  853. pri = 0;
  854. p->basepri = pri;
  855. p->priority = pri;
  856. if(fixed){
  857. p->fixedpri = 1;
  858. } else {
  859. p->fixedpri = 0;
  860. }
  861. }
  862. /*
  863. * sleep if a condition is not true. Another process will
  864. * awaken us after it sets the condition. When we awaken
  865. * the condition may no longer be true.
  866. *
  867. * we lock both the process and the rendezvous to keep r->p
  868. * and p->r synchronized.
  869. */
  870. void
  871. sleep(Rendez *r, int (*f)(void*), void *arg)
  872. {
  873. Mpl pl;
  874. pl = splhi();
  875. if(up->nlocks)
  876. print("process %d sleeps with %d locks held, last lock %#p locked at pc %#p, sleep called from %#p\n",
  877. up->pid, up->nlocks, up->lastlock, up->lastlock->pc, getcallerpc(&r));
  878. lock(r);
  879. lock(&up->rlock);
  880. if(r->p){
  881. print("double sleep called from %#p, %d %d\n",
  882. getcallerpc(&r), r->p->pid, up->pid);
  883. dumpstack();
  884. }
  885. /*
  886. * Wakeup only knows there may be something to do by testing
  887. * r->p in order to get something to lock on.
  888. * Flush that information out to memory in case the sleep is
  889. * committed.
  890. */
  891. r->p = up;
  892. if((*f)(arg) || up->notepending){
  893. /*
  894. * if condition happened or a note is pending
  895. * never mind
  896. */
  897. r->p = nil;
  898. unlock(&up->rlock);
  899. unlock(r);
  900. } else {
  901. /*
  902. * now we are committed to
  903. * change state and call scheduler
  904. */
  905. if(up->trace)
  906. proctrace(up, SSleep, 0);
  907. up->state = Wakeme;
  908. up->r = r;
  909. /* statistics */
  910. m->cs++;
  911. procsave(up);
  912. mmuflushtlb(m->pml4->pa);
  913. if(setlabel(&up->sched)) {
  914. /*
  915. * here when the process is awakened
  916. */
  917. procrestore(up);
  918. spllo();
  919. } else {
  920. /*
  921. * here to go to sleep (i.e. stop Running)
  922. */
  923. unlock(&up->rlock);
  924. unlock(r);
  925. gotolabel(&m->sched);
  926. }
  927. }
  928. if(up->notepending) {
  929. up->notepending = 0;
  930. splx(pl);
  931. if(up->procctl == Proc_exitme && up->closingfgrp)
  932. forceclosefgrp();
  933. error(Eintr);
  934. }
  935. splx(pl);
  936. }
  937. static int
  938. tfn(void *arg)
  939. {
  940. return up->trend == nil || up->tfn(arg);
  941. }
  942. void
  943. twakeup(Ureg*, Timer *t)
  944. {
  945. Proc *p;
  946. Rendez *trend;
  947. p = t->ta;
  948. trend = p->trend;
  949. p->trend = 0;
  950. if(trend)
  951. wakeup(trend);
  952. }
  953. void
  954. tsleep(Rendez *r, int (*fn)(void*), void *arg, int32_t ms)
  955. {
  956. if (up->tt){
  957. print("tsleep: timer active: mode %d, tf %#p\n",
  958. up->tmode, up->tf);
  959. timerdel(up);
  960. }
  961. up->tns = MS2NS(ms);
  962. up->tf = twakeup;
  963. up->tmode = Trelative;
  964. up->ta = up;
  965. up->trend = r;
  966. up->tfn = fn;
  967. timeradd(up);
  968. if(waserror()){
  969. timerdel(up);
  970. nexterror();
  971. }
  972. sleep(r, tfn, arg);
  973. if (up->tt)
  974. timerdel(up);
  975. up->twhen = 0;
  976. poperror();
  977. }
  978. /*
  979. * Expects that only one process can call wakeup for any given Rendez.
  980. * We hold both locks to ensure that r->p and p->r remain consistent.
  981. * Richard Miller has a better solution that doesn't require both to
  982. * be held simultaneously, but I'm a paranoid - presotto.
  983. */
  984. Proc*
  985. wakeup(Rendez *r)
  986. {
  987. Mpl pl;
  988. Proc *p;
  989. pl = splhi();
  990. lock(r);
  991. p = r->p;
  992. if(p != nil){
  993. lock(&p->rlock);
  994. if(p->state != Wakeme || p->r != r)
  995. panic("wakeup: state");
  996. r->p = nil;
  997. p->r = nil;
  998. ready(p);
  999. unlock(&p->rlock);
  1000. }
  1001. unlock(r);
  1002. splx(pl);
  1003. return p;
  1004. }
  1005. /*
  1006. * if waking a sleeping process, this routine must hold both
  1007. * p->rlock and r->lock. However, it can't know them in
  1008. * the same order as wakeup causing a possible lock ordering
  1009. * deadlock. We break the deadlock by giving up the p->rlock
  1010. * lock if we can't get the r->lock and retrying.
  1011. */
  1012. int
  1013. postnote(Proc *p, int dolock, char *n, int flag)
  1014. {
  1015. Mpl pl;
  1016. int ret;
  1017. Rendez *r;
  1018. Proc *d, **l;
  1019. if(dolock)
  1020. qlock(&p->debug);
  1021. if(flag != NUser && (p->notify == 0 || p->notified))
  1022. p->nnote = 0;
  1023. ret = 0;
  1024. if(p->nnote < NNOTE) {
  1025. strcpy(p->note[p->nnote].msg, n);
  1026. p->note[p->nnote++].flag = flag;
  1027. ret = 1;
  1028. }
  1029. p->notepending = 1;
  1030. /* NIX */
  1031. if(p->state == Exotic){
  1032. /* it could be that the process is not running
  1033. * in the AC when we interrupt the AC, but then
  1034. * we'd only get an extra interrupt in the AC, and
  1035. * nothing should happen.
  1036. */
  1037. intrac(p);
  1038. }
  1039. if(dolock)
  1040. qunlock(&p->debug);
  1041. /* this loop is to avoid lock ordering problems. */
  1042. for(;;){
  1043. pl = splhi();
  1044. lock(&p->rlock);
  1045. r = p->r;
  1046. /* waiting for a wakeup? */
  1047. if(r == nil)
  1048. break; /* no */
  1049. /* try for the second lock */
  1050. if(canlock(r)){
  1051. if(p->state != Wakeme || r->p != p)
  1052. panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state);
  1053. p->r = nil;
  1054. r->p = nil;
  1055. ready(p);
  1056. unlock(r);
  1057. break;
  1058. }
  1059. /* give other process time to get out of critical section and try again */
  1060. unlock(&p->rlock);
  1061. splx(pl);
  1062. sched();
  1063. }
  1064. unlock(&p->rlock);
  1065. splx(pl);
  1066. if(p->state != Rendezvous){
  1067. if(p->state == Semdown)
  1068. ready(p);
  1069. return ret;
  1070. }
  1071. /* Try and pull out of a rendezvous */
  1072. lock(p->rgrp);
  1073. if(p->state == Rendezvous) {
  1074. p->rendval = ~0;
  1075. l = &REND(p->rgrp, p->rendtag);
  1076. for(d = *l; d; d = d->rendhash) {
  1077. if(d == p) {
  1078. *l = p->rendhash;
  1079. break;
  1080. }
  1081. l = &d->rendhash;
  1082. }
  1083. ready(p);
  1084. }
  1085. unlock(p->rgrp);
  1086. return ret;
  1087. }
  1088. /*
  1089. * weird thing: keep at most NBROKEN around
  1090. */
  1091. #define NBROKEN 4
  1092. struct
  1093. {
  1094. QLock;
  1095. int n;
  1096. Proc *p[NBROKEN];
  1097. }broken;
  1098. void
  1099. addbroken(Proc *p)
  1100. {
  1101. qlock(&broken);
  1102. if(broken.n == NBROKEN) {
  1103. ready(broken.p[0]);
  1104. memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1));
  1105. --broken.n;
  1106. }
  1107. broken.p[broken.n++] = p;
  1108. qunlock(&broken);
  1109. stopac();
  1110. edfstop(up);
  1111. p->state = Broken;
  1112. p->psstate = 0;
  1113. sched();
  1114. }
  1115. void
  1116. unbreak(Proc *p)
  1117. {
  1118. int b;
  1119. qlock(&broken);
  1120. for(b=0; b < broken.n; b++)
  1121. if(broken.p[b] == p) {
  1122. broken.n--;
  1123. memmove(&broken.p[b], &broken.p[b+1],
  1124. sizeof(Proc*)*(NBROKEN-(b+1)));
  1125. ready(p);
  1126. break;
  1127. }
  1128. qunlock(&broken);
  1129. }
  1130. int
  1131. freebroken(void)
  1132. {
  1133. int i, n;
  1134. qlock(&broken);
  1135. n = broken.n;
  1136. for(i=0; i<n; i++) {
  1137. ready(broken.p[i]);
  1138. broken.p[i] = 0;
  1139. }
  1140. broken.n = 0;
  1141. qunlock(&broken);
  1142. return n;
  1143. }
  1144. void
  1145. pexit(char *exitstr, int freemem)
  1146. {
  1147. Proc *p;
  1148. Segment **s, **es;
  1149. int32_t utime, stime;
  1150. Waitq *wq, *f, *next;
  1151. Fgrp *fgrp;
  1152. Egrp *egrp;
  1153. Rgrp *rgrp;
  1154. Pgrp *pgrp;
  1155. Chan *dot;
  1156. if(0 && up->nfullq > 0)
  1157. iprint(" %s=%d", up->text, up->nfullq);
  1158. if(0 && up->nicc > 0)
  1159. iprint(" [%s nicc %ud tctime %ulld actime %ulld]\n",
  1160. up->text, up->nicc, up->tctime, up->actime);
  1161. if(up->syscalltrace != nil)
  1162. free(up->syscalltrace);
  1163. up->syscalltrace = nil;
  1164. up->alarm = 0;
  1165. if (up->tt)
  1166. timerdel(up);
  1167. if(up->trace)
  1168. proctrace(up, SDead, 0);
  1169. /* nil out all the resources under lock (free later) */
  1170. qlock(&up->debug);
  1171. fgrp = up->fgrp;
  1172. up->fgrp = nil;
  1173. egrp = up->egrp;
  1174. up->egrp = nil;
  1175. rgrp = up->rgrp;
  1176. up->rgrp = nil;
  1177. pgrp = up->pgrp;
  1178. up->pgrp = nil;
  1179. dot = up->dot;
  1180. up->dot = nil;
  1181. qunlock(&up->debug);
  1182. if(fgrp)
  1183. closefgrp(fgrp);
  1184. if(egrp)
  1185. closeegrp(egrp);
  1186. if(rgrp)
  1187. closergrp(rgrp);
  1188. if(dot)
  1189. cclose(dot);
  1190. if(pgrp)
  1191. closepgrp(pgrp);
  1192. /*
  1193. * if not a kernel process and have a parent,
  1194. * do some housekeeping.
  1195. */
  1196. if(up->kp == 0) {
  1197. p = up->parent;
  1198. if(p == 0) {
  1199. if(exitstr == 0)
  1200. exitstr = "unknown";
  1201. panic("boot process died: %s", exitstr);
  1202. }
  1203. while(waserror())
  1204. ;
  1205. wq = smalloc(sizeof(Waitq));
  1206. poperror();
  1207. wq->w.pid = up->pid;
  1208. utime = up->time[TUser] + up->time[TCUser];
  1209. stime = up->time[TSys] + up->time[TCSys];
  1210. wq->w.time[TUser] = tk2ms(utime);
  1211. wq->w.time[TSys] = tk2ms(stime);
  1212. wq->w.time[TReal] = tk2ms(sys->ticks - up->time[TReal]);
  1213. if(exitstr && exitstr[0])
  1214. snprint(wq->w.msg, sizeof(wq->w.msg), "%s %d: %s",
  1215. up->text, up->pid, exitstr);
  1216. else
  1217. wq->w.msg[0] = '\0';
  1218. lock(&p->exl);
  1219. /*
  1220. * Check that parent is still alive.
  1221. */
  1222. if(p->pid == up->parentpid && p->state != Broken) {
  1223. p->nchild--;
  1224. p->time[TCUser] += utime;
  1225. p->time[TCSys] += stime;
  1226. /*
  1227. * If there would be more than 128 wait records
  1228. * processes for my parent, then don't leave a wait
  1229. * record behind. This helps prevent badly written
  1230. * daemon processes from accumulating lots of wait
  1231. * records.
  1232. */
  1233. if(p->nwait < 128) {
  1234. wq->next = p->waitq;
  1235. p->waitq = wq;
  1236. p->nwait++;
  1237. wq = nil;
  1238. wakeup(&p->waitr);
  1239. }
  1240. }
  1241. unlock(&p->exl);
  1242. if(wq)
  1243. free(wq);
  1244. }
  1245. if(!freemem)
  1246. addbroken(up);
  1247. qlock(&up->seglock);
  1248. es = &up->seg[NSEG];
  1249. for(s = up->seg; s < es; s++) {
  1250. if(*s) {
  1251. putseg(*s);
  1252. *s = 0;
  1253. }
  1254. }
  1255. qunlock(&up->seglock);
  1256. lock(&up->exl); /* Prevent my children from leaving waits */
  1257. psunhash(up);
  1258. up->pid = 0;
  1259. wakeup(&up->waitr);
  1260. unlock(&up->exl);
  1261. for(f = up->waitq; f; f = next) {
  1262. next = f->next;
  1263. free(f);
  1264. }
  1265. /* release debuggers */
  1266. qlock(&up->debug);
  1267. if(up->pdbg) {
  1268. wakeup(&up->pdbg->sleep);
  1269. up->pdbg = 0;
  1270. }
  1271. qunlock(&up->debug);
  1272. /* Sched must not loop for these locks */
  1273. lock(&procalloc);
  1274. lock(&pga);
  1275. stopac();
  1276. edfstop(up);
  1277. up->state = Moribund;
  1278. sched();
  1279. panic("pexit");
  1280. }
  1281. int
  1282. haswaitq(void *x)
  1283. {
  1284. Proc *p;
  1285. p = (Proc *)x;
  1286. return p->waitq != 0;
  1287. }
  1288. int
  1289. pwait(Waitmsg *w)
  1290. {
  1291. int cpid;
  1292. Waitq *wq;
  1293. if(!canqlock(&up->qwaitr))
  1294. error(Einuse);
  1295. if(waserror()) {
  1296. qunlock(&up->qwaitr);
  1297. nexterror();
  1298. }
  1299. lock(&up->exl);
  1300. if(up->nchild == 0 && up->waitq == 0) {
  1301. unlock(&up->exl);
  1302. error(Enochild);
  1303. }
  1304. unlock(&up->exl);
  1305. sleep(&up->waitr, haswaitq, up);
  1306. lock(&up->exl);
  1307. wq = up->waitq;
  1308. up->waitq = wq->next;
  1309. up->nwait--;
  1310. unlock(&up->exl);
  1311. qunlock(&up->qwaitr);
  1312. poperror();
  1313. if(w)
  1314. memmove(w, &wq->w, sizeof(Waitmsg));
  1315. cpid = wq->w.pid;
  1316. free(wq);
  1317. return cpid;
  1318. }
  1319. void
  1320. dumpaproc(Proc *p)
  1321. {
  1322. uintptr bss;
  1323. char *s;
  1324. if(p == 0)
  1325. return;
  1326. bss = 0;
  1327. if(p->seg[HSEG])
  1328. bss = p->seg[HSEG]->top;
  1329. else if(p->seg[BSEG])
  1330. bss = p->seg[BSEG]->top;
  1331. s = p->psstate;
  1332. if(s == 0)
  1333. s = statename[p->state];
  1334. print("%3d:%10s pc %#p dbgpc %#p %8s (%s) ut %ld st %ld bss %#p qpc %#p nl %d nd %lud lpc %#p pri %lud\n",
  1335. p->pid, p->text, p->pc, dbgpc(p), s, statename[p->state],
  1336. p->time[0], p->time[1], bss, p->qpc, p->nlocks,
  1337. p->delaysched, p->lastlock ? p->lastlock->pc : 0, p->priority);
  1338. }
  1339. void
  1340. procdump(void)
  1341. {
  1342. int i;
  1343. Proc *p;
  1344. if(up)
  1345. print("up %d\n", up->pid);
  1346. else
  1347. print("no current process\n");
  1348. for(i=0; (p = psincref(i)) != nil; i++) {
  1349. if(p->state != Dead)
  1350. dumpaproc(p);
  1351. psdecref(p);
  1352. }
  1353. }
  1354. /*
  1355. * wait till all processes have flushed their mmu
  1356. * state about segement s
  1357. */
  1358. void
  1359. procflushseg(Segment *s)
  1360. {
  1361. int i, ns, nm, nwait;
  1362. Proc *p;
  1363. Mach *mp;
  1364. /*
  1365. * tell all processes with this
  1366. * segment to flush their mmu's
  1367. */
  1368. nwait = 0;
  1369. for(i=0; (p = psincref(i)) != nil; i++) {
  1370. if(p->state == Dead){
  1371. psdecref(p);
  1372. continue;
  1373. }
  1374. for(ns = 0; ns < NSEG; ns++){
  1375. if(p->seg[ns] == s){
  1376. p->newtlb = 1;
  1377. for(nm = 0; nm < MACHMAX; nm++)
  1378. if((mp = sys->machptr[nm]) != nil && mp->online)
  1379. if(mp->proc == p){
  1380. mp->mmuflush = 1;
  1381. nwait++;
  1382. }
  1383. break;
  1384. }
  1385. }
  1386. psdecref(p);
  1387. }
  1388. if(nwait == 0)
  1389. return;
  1390. /*
  1391. * wait for all processors to take a clock interrupt
  1392. * and flush their mmu's.
  1393. * NIX BUG: this won't work if another core is in AC mode.
  1394. * In that case we must IPI it, but only if that core is
  1395. * using this segment.
  1396. */
  1397. for(i = 0; i < MACHMAX; i++)
  1398. if((mp = sys->machptr[i]) != nil && mp->online)
  1399. if(mp != m)
  1400. while(mp->mmuflush)
  1401. sched();
  1402. }
  1403. void
  1404. scheddump(void)
  1405. {
  1406. Proc *p;
  1407. Schedq *rq;
  1408. for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--){
  1409. if(rq->head == 0)
  1410. continue;
  1411. print("run[%ld]:", rq-run.runq);
  1412. for(p = rq->head; p; p = p->rnext)
  1413. print(" %d(%lud)", p->pid, m->ticks - p->readytime);
  1414. print("\n");
  1415. delay(150);
  1416. }
  1417. print("nrdy %d\n", run.nrdy);
  1418. }
  1419. void
  1420. kproc(char *name, void (*func)(void *), void *arg)
  1421. {
  1422. Proc *p;
  1423. static Pgrp *kpgrp;
  1424. p = newproc();
  1425. p->psstate = 0;
  1426. p->procmode = 0640;
  1427. p->kp = 1;
  1428. p->noswap = 1;
  1429. p->scallnr = up->scallnr;
  1430. memmove(p->arg, up->arg, sizeof(up->arg));
  1431. p->nerrlab = 0;
  1432. p->slash = up->slash;
  1433. p->dot = up->dot;
  1434. if(p->dot)
  1435. incref(p->dot);
  1436. memmove(p->note, up->note, sizeof(p->note));
  1437. p->nnote = up->nnote;
  1438. p->notified = 0;
  1439. p->lastnote = up->lastnote;
  1440. p->notify = up->notify;
  1441. p->ureg = 0;
  1442. p->dbgreg = 0;
  1443. procpriority(p, PriKproc, 0);
  1444. kprocchild(p, func, arg);
  1445. kstrdup(&p->user, eve);
  1446. kstrdup(&p->text, name);
  1447. if(kpgrp == 0)
  1448. kpgrp = newpgrp();
  1449. p->pgrp = kpgrp;
  1450. incref(kpgrp);
  1451. memset(p->time, 0, sizeof(p->time));
  1452. p->time[TReal] = sys->ticks;
  1453. ready(p);
  1454. /*
  1455. * since the bss/data segments are now shareable,
  1456. * any mmu info about this process is now stale
  1457. * and has to be discarded.
  1458. */
  1459. p->newtlb = 1;
  1460. mmuflush();
  1461. }
  1462. /*
  1463. * called splhi() by notify(). See comment in notify for the
  1464. * reasoning.
  1465. */
  1466. void
  1467. procctl(Proc *p)
  1468. {
  1469. Mpl pl;
  1470. char *state;
  1471. switch(p->procctl) {
  1472. case Proc_exitbig:
  1473. spllo();
  1474. pexit("Killed: Insufficient physical memory", 1);
  1475. case Proc_exitme:
  1476. spllo(); /* pexit has locks in it */
  1477. pexit("Killed", 1);
  1478. case Proc_traceme:
  1479. if(p->nnote == 0)
  1480. return;
  1481. /* No break */
  1482. case Proc_stopme:
  1483. p->procctl = 0;
  1484. state = p->psstate;
  1485. p->psstate = "Stopped";
  1486. /* free a waiting debugger */
  1487. pl = spllo();
  1488. qlock(&p->debug);
  1489. if(p->pdbg) {
  1490. wakeup(&p->pdbg->sleep);
  1491. p->pdbg = 0;
  1492. }
  1493. qunlock(&p->debug);
  1494. splhi();
  1495. p->state = Stopped;
  1496. sched();
  1497. p->psstate = state;
  1498. splx(pl);
  1499. return;
  1500. case Proc_toac:
  1501. p->procctl = 0;
  1502. /*
  1503. * This pretends to return from the system call,
  1504. * by moving to a core, but never returns (unless
  1505. * the process gets moved back to a TC.)
  1506. */
  1507. spllo();
  1508. runacore();
  1509. return;
  1510. case Proc_totc:
  1511. p->procctl = 0;
  1512. if(p != up)
  1513. panic("procctl: stopac: p != up");
  1514. spllo();
  1515. stopac();
  1516. return;
  1517. }
  1518. }
  1519. void
  1520. error(char *err)
  1521. {
  1522. spllo();
  1523. assert(up->nerrlab < NERR);
  1524. kstrcpy(up->errstr, err, ERRMAX);
  1525. setlabel(&up->errlab[NERR-1]);
  1526. nexterror();
  1527. }
  1528. void
  1529. nexterror(void)
  1530. {
  1531. gotolabel(&up->errlab[--up->nerrlab]);
  1532. }
  1533. void
  1534. exhausted(char *resource)
  1535. {
  1536. char buf[ERRMAX];
  1537. sprint(buf, "no free %s", resource);
  1538. iprint("%s\n", buf);
  1539. error(buf);
  1540. }
  1541. void
  1542. killbig(char *why)
  1543. {
  1544. int i, x;
  1545. Segment *s;
  1546. uint32_t l, max;
  1547. Proc *p, *kp;
  1548. max = 0;
  1549. kp = nil;
  1550. for(x = 0; (p = psincref(x)) != nil; x++) {
  1551. if(p->state == Dead || p->kp){
  1552. psdecref(p);
  1553. continue;
  1554. }
  1555. l = 0;
  1556. for(i=1; i<NSEG; i++) {
  1557. s = p->seg[i];
  1558. if(s != 0)
  1559. l += s->top - s->base;
  1560. }
  1561. if(l > max && ((p->procmode&0222) || strcmp(eve, p->user)!=0)) {
  1562. if(kp != nil)
  1563. psdecref(kp);
  1564. kp = p;
  1565. max = l;
  1566. }
  1567. else
  1568. psdecref(p);
  1569. }
  1570. if(kp == nil)
  1571. return;
  1572. print("%d: %s killed: %s\n", kp->pid, kp->text, why);
  1573. for(x = 0; (p = psincref(x)) != nil; x++) {
  1574. if(p->state == Dead || p->kp){
  1575. psdecref(p);
  1576. continue;
  1577. }
  1578. if(p != kp && p->seg[BSEG] && p->seg[BSEG] == kp->seg[BSEG])
  1579. p->procctl = Proc_exitbig;
  1580. psdecref(p);
  1581. }
  1582. kp->procctl = Proc_exitbig;
  1583. for(i = 0; i < NSEG; i++) {
  1584. s = kp->seg[i];
  1585. if(s != 0 && canqlock(&s->lk)) {
  1586. mfreeseg(s, s->base, (s->top - s->base)/BIGPGSZ);
  1587. qunlock(&s->lk);
  1588. }
  1589. }
  1590. psdecref(kp);
  1591. }
  1592. /*
  1593. * change ownership to 'new' of all processes owned by 'old'. Used when
  1594. * eve changes.
  1595. */
  1596. void
  1597. renameuser(char *old, char *new)
  1598. {
  1599. int i;
  1600. Proc *p;
  1601. for(i = 0; (p = psincref(i)) != nil; i++){
  1602. if(p->user!=nil && strcmp(old, p->user)==0)
  1603. kstrdup(&p->user, new);
  1604. psdecref(p);
  1605. }
  1606. }
  1607. /*
  1608. * time accounting called by clock() splhi'd
  1609. * only cpu1 computes system load average
  1610. * but the system load average is accounted for cpu0.
  1611. */
  1612. void
  1613. accounttime(void)
  1614. {
  1615. Proc *p;
  1616. uint32_t n, per;
  1617. p = m->proc;
  1618. if(p) {
  1619. if(m->machno == 1)
  1620. run.nrun++;
  1621. p->time[p->insyscall]++;
  1622. }
  1623. /* calculate decaying duty cycles */
  1624. n = perfticks();
  1625. per = n - m->perf.last;
  1626. m->perf.last = n;
  1627. per = (m->perf.period*(HZ-1) + per)/HZ;
  1628. if(per != 0)
  1629. m->perf.period = per;
  1630. m->perf.avg_inidle = (m->perf.avg_inidle*(HZ-1)+m->perf.inidle)/HZ;
  1631. m->perf.inidle = 0;
  1632. m->perf.avg_inintr = (m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ;
  1633. m->perf.inintr = 0;
  1634. /* only one processor gets to compute system load averages.
  1635. * it has to be mach 1 when we use AMP.
  1636. */
  1637. if(sys->nmach > 1 && m->machno != 1)
  1638. return;
  1639. /*
  1640. * calculate decaying load average.
  1641. * if we decay by (n-1)/n then it takes
  1642. * n clock ticks to go from load L to .36 L once
  1643. * things quiet down. it takes about 5 n clock
  1644. * ticks to go to zero. so using HZ means this is
  1645. * approximately the load over the last second,
  1646. * with a tail lasting about 5 seconds.
  1647. */
  1648. n = run.nrun;
  1649. run.nrun = 0;
  1650. n = (run.nrdy+n)*1000;
  1651. sys->load = (sys->load*(HZ-1)+n)/HZ;
  1652. }
  1653. void
  1654. halt(void)
  1655. {
  1656. if(run.nrdy != 0)
  1657. return;
  1658. hardhalt();
  1659. }