proc-SMP.c 34 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. #include "../port/error.h"
  15. #include "../port/edf.h"
  16. #include "errstr.h"
  17. #include <trace.h>
  18. enum
  19. {
  20. Scaling=2,
  21. AMPmincores = 5,
  22. };
  23. Ref noteidalloc;
  24. static Ref pidalloc;
  25. static Sched run;
  26. struct Procalloc procalloc;
  27. extern Proc* psalloc(void);
  28. extern void pshash(Proc*);
  29. extern void psrelease(Proc*);
  30. extern void psunhash(Proc*);
  31. static int reprioritize(Proc*);
  32. static void updatecpu(Proc*);
  33. static void rebalance(void);
  34. char *statename[] =
  35. { /* BUG: generate automatically */
  36. "Dead",
  37. "Moribund",
  38. "Ready",
  39. "Scheding",
  40. "Running",
  41. "Queueing",
  42. "QueueingR",
  43. "QueueingW",
  44. "Wakeme",
  45. "Broken",
  46. "Stopped",
  47. "Rendez",
  48. "Waitrelease",
  49. "Exotic",
  50. "Down",
  51. };
  52. Sched*
  53. procsched(Proc *p)
  54. {
  55. return &run;
  56. }
  57. /*
  58. * bad planning, once more.
  59. */
  60. void
  61. procinit0(void)
  62. {
  63. run.schedgain = 30;
  64. }
  65. /*
  66. * Always splhi()'ed.
  67. */
  68. void
  69. schedinit(void) /* never returns */
  70. {
  71. Edf *e;
  72. m->inidle = 1;
  73. m->proc = nil;
  74. ainc(&run.nmach);
  75. setlabel(&m->sched);
  76. if(up) {
  77. if((e = up->edf) && (e->flags & Admitted))
  78. edfrecord(up);
  79. m->qstart = 0;
  80. m->qexpired = 0;
  81. coherence();
  82. m->proc = 0;
  83. switch(up->state) {
  84. case Running:
  85. ready(up);
  86. break;
  87. case Moribund:
  88. up->state = Dead;
  89. stopac();
  90. edfstop(up);
  91. if (up->edf)
  92. free(up->edf);
  93. up->edf = nil;
  94. /*
  95. * Holding locks from pexit:
  96. * procalloc
  97. * pga
  98. */
  99. mmurelease(up);
  100. unlock(&pga);
  101. psrelease(up);
  102. unlock(&procalloc);
  103. break;
  104. }
  105. up->mach = nil;
  106. updatecpu(up);
  107. up = nil;
  108. }
  109. sched();
  110. }
  111. /*
  112. * Check if the stack has more than 4*KiB free.
  113. * Do not call panic, the stack is gigantic.
  114. */
  115. static void
  116. stackok(void)
  117. {
  118. char dummy;
  119. if(&dummy < (char*)up->kstack + 4*KiB){
  120. print("tc kernel stack overflow, cpu%d stopped\n", machp()->machno);
  121. DONE();
  122. }
  123. }
  124. /*
  125. * If changing this routine, look also at sleep(). It
  126. * contains a copy of the guts of sched().
  127. */
  128. void
  129. sched(void)
  130. {
  131. Proc *p;
  132. if(m->ilockdepth)
  133. panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p",
  134. machp()->machno,
  135. m->ilockdepth,
  136. up? up->lastilock: nil,
  137. (up && up->lastilock)? up->lastilock->_pc: 0,
  138. getcallerpc(&p+2));
  139. if(up){
  140. /*
  141. * Delay the sched until the process gives up the locks
  142. * it is holding. This avoids dumb lock loops.
  143. * Don't delay if the process is Moribund.
  144. * It called sched to die.
  145. * But do sched eventually. This avoids a missing unlock
  146. * from hanging the entire kernel.
  147. * But don't reschedule procs holding palloc or procalloc.
  148. * Those are far too important to be holding while asleep.
  149. *
  150. * This test is not exact. There can still be a few
  151. * instructions in the middle of taslock when a process
  152. * holds a lock but Lock.p has not yet been initialized.
  153. */
  154. if(up->nlocks)
  155. if(up->state != Moribund)
  156. if(up->delaysched < 20
  157. || pga.Lock.p == up
  158. || procalloc.Lock.p == up){
  159. up->delaysched++;
  160. run.delayedscheds++;
  161. return;
  162. }
  163. up->delaysched = 0;
  164. splhi();
  165. /* statistics */
  166. if(up->nqtrap == 0 && up->nqsyscall == 0)
  167. up->nfullq++;
  168. m->cs++;
  169. stackok();
  170. procsave(up);
  171. mmuflushtlb(machp()->pml4->pa);
  172. if(setlabel(&up->sched)){
  173. procrestore(up);
  174. spllo();
  175. return;
  176. }
  177. gotolabel(&m->sched);
  178. }
  179. m->inidle = 1;
  180. p = runproc(); /* core 0 never returns */
  181. m->inidle = 0;
  182. if(!p->edf){
  183. updatecpu(p);
  184. p->priority = reprioritize(p);
  185. }
  186. up = p;
  187. m->qstart = machp()->ticks;
  188. up->nqtrap = 0;
  189. up->nqsyscall = 0;
  190. up->state = Running;
  191. up->mach = m;
  192. m->proc = up;
  193. mmuswitch(up);
  194. assert(!up->wired || up->wired == m);
  195. gotolabel(&up->sched);
  196. }
  197. int
  198. anyready(void)
  199. {
  200. return run.runvec;
  201. }
  202. int
  203. anyhigher(void)
  204. {
  205. return run.runvec & ~((1<<(up->priority+1))-1);
  206. }
  207. /*
  208. * here once per clock tick to see if we should resched
  209. */
  210. void
  211. hzsched(void)
  212. {
  213. /* once a second, rebalance will reprioritize ready procs */
  214. if(machp()->machno == 0){
  215. rebalance();
  216. return;
  217. }
  218. /* with <= 4 cores, we use SMP and core 0 does not set qexpired for us */
  219. if(sys->nmach <= AMPmincores)
  220. if(machp()->ticks - m->qstart >= HZ/10)
  221. m->qexpired = 1;
  222. /* unless preempted, get to run */
  223. if(m->qexpired && anyready())
  224. up->delaysched++;
  225. }
  226. /*
  227. * here at the end of non-clock interrupts to see if we should preempt the
  228. * current process. Returns 1 if preempted, 0 otherwise.
  229. */
  230. int
  231. preempted(void)
  232. {
  233. if(up && up->state == Running)
  234. if(up->preempted == 0)
  235. if(anyhigher())
  236. if(!active.exiting){
  237. /* Core 0 is dispatching all interrupts, so no core
  238. * actually running a user process is ever going call preempted, unless
  239. * we consider IPIs for preemption or we distribute interrupts.
  240. * But we are going to use SMP for machines with few cores.
  241. panic("preemted used");
  242. */
  243. up->preempted = 1;
  244. sched();
  245. splhi();
  246. up->preempted = 0;
  247. return 1;
  248. }
  249. return 0;
  250. }
  251. /*
  252. * Update the cpu time average for this particular process,
  253. * which is about to change from up -> not up or vice versa.
  254. * p->lastupdate is the last time an updatecpu happened.
  255. *
  256. * The cpu time average is a decaying average that lasts
  257. * about D clock ticks. D is chosen to be approximately
  258. * the cpu time of a cpu-intensive "quick job". A job has to run
  259. * for approximately D clock ticks before we home in on its
  260. * actual cpu usage. Thus if you manage to get in and get out
  261. * quickly, you won't be penalized during your burst. Once you
  262. * start using your share of the cpu for more than about D
  263. * clock ticks though, your p->cpu hits 1000 (1.0) and you end up
  264. * below all the other quick jobs. Interactive tasks, because
  265. * they basically always use less than their fair share of cpu,
  266. * will be rewarded.
  267. *
  268. * If the process has not been running, then we want to
  269. * apply the filter
  270. *
  271. * cpu = cpu * (D-1)/D
  272. *
  273. * n times, yielding
  274. *
  275. * cpu = cpu * ((D-1)/D)^n
  276. *
  277. * but D is big enough that this is approximately
  278. *
  279. * cpu = cpu * (D-n)/D
  280. *
  281. * so we use that instead.
  282. *
  283. * If the process has been running, we apply the filter to
  284. * 1 - cpu, yielding a similar equation. Note that cpu is
  285. * stored in fixed point (* 1000).
  286. *
  287. * Updatecpu must be called before changing up, in order
  288. * to maintain accurate cpu usage statistics. It can be called
  289. * at any time to bring the stats for a given proc up-to-date.
  290. */
  291. static void
  292. updatecpu(Proc *p)
  293. {
  294. int D, n, t, ocpu;
  295. if(p->edf)
  296. return;
  297. t = sys->ticks*Scaling + Scaling/2;
  298. n = t - p->lastupdate;
  299. p->lastupdate = t;
  300. if(n == 0)
  301. return;
  302. D = run.schedgain*HZ*Scaling;
  303. if(n > D)
  304. n = D;
  305. ocpu = p->cpu;
  306. if(p != up)
  307. p->cpu = (ocpu*(D-n))/D;
  308. else{
  309. t = 1000 - ocpu;
  310. t = (t*(D-n))/D;
  311. p->cpu = 1000 - t;
  312. }
  313. //iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu);
  314. }
  315. /*
  316. * On average, p has used p->cpu of a cpu recently.
  317. * Its fair share is nmach/m->load of a cpu. If it has been getting
  318. * too much, penalize it. If it has been getting not enough, reward it.
  319. * I don't think you can get much more than your fair share that
  320. * often, so most of the queues are for using less. Having a priority
  321. * of 3 means you're just right. Having a higher priority (up to p->basepri)
  322. * means you're not using as much as you could.
  323. */
  324. static int
  325. reprioritize(Proc *p)
  326. {
  327. int fairshare, n, load, ratio;
  328. load = sys->load;
  329. if(load == 0)
  330. return p->basepri;
  331. /*
  332. * fairshare = 1.000 * conf.nproc * 1.000/load,
  333. * except the decimal point is moved three places
  334. * on both load and fairshare.
  335. */
  336. fairshare = (sys->nmach*1000*1000)/load;
  337. n = p->cpu;
  338. if(n == 0)
  339. n = 1;
  340. ratio = (fairshare+n/2) / n;
  341. if(ratio > p->basepri)
  342. ratio = p->basepri;
  343. if(ratio < 0)
  344. panic("reprioritize");
  345. //iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio);
  346. return ratio;
  347. }
  348. /*
  349. * add a process to a scheduling queue
  350. */
  351. static void
  352. queueproc(Sched *sch, Schedq *rq, Proc *p, int locked)
  353. {
  354. int pri;
  355. pri = rq - sch->runq;
  356. if(!locked)
  357. lock(sch);
  358. else if(canlock(sch))
  359. panic("queueproc: locked and can lock");
  360. p->priority = pri;
  361. p->rnext = 0;
  362. if(rq->tail)
  363. rq->tail->rnext = p;
  364. else
  365. rq->head = p;
  366. rq->tail = p;
  367. rq->n++;
  368. sch->nrdy++;
  369. sch->runvec |= 1<<pri;
  370. if(!locked)
  371. unlock(sch);
  372. }
  373. /*
  374. * try to remove a process from a scheduling queue (called splhi)
  375. */
  376. Proc*
  377. dequeueproc(Sched *sch, Schedq *rq, Proc *tp)
  378. {
  379. Proc *l, *p;
  380. if(!canlock(sch))
  381. return nil;
  382. /*
  383. * the queue may have changed before we locked runq,
  384. * refind the target process.
  385. */
  386. l = 0;
  387. for(p = rq->head; p; p = p->rnext){
  388. if(p == tp)
  389. break;
  390. l = p;
  391. }
  392. /*
  393. * p->mach==0 only when process state is saved
  394. */
  395. if(p == 0 || p->mach){
  396. unlock(sch);
  397. return nil;
  398. }
  399. if(p->rnext == 0)
  400. rq->tail = l;
  401. if(l)
  402. l->rnext = p->rnext;
  403. else
  404. rq->head = p->rnext;
  405. if(rq->head == nil)
  406. sch->runvec &= ~(1<<(rq-sch->runq));
  407. rq->n--;
  408. sch->nrdy--;
  409. if(p->state != Ready)
  410. print("dequeueproc %s %d %s\n", p->text, p->pid, statename[p->state]);
  411. unlock(sch);
  412. return p;
  413. }
  414. static void
  415. schedready(Sched *sch, Proc *p, int locked)
  416. {
  417. Mpl pl;
  418. int pri;
  419. Schedq *rq;
  420. pl = splhi();
  421. if(edfready(p)){
  422. splx(pl);
  423. return;
  424. }
  425. updatecpu(p);
  426. pri = reprioritize(p);
  427. p->priority = pri;
  428. rq = &sch->runq[pri];
  429. p->state = Ready;
  430. queueproc(sch, rq, p, locked);
  431. if(p->trace)
  432. proctrace(p, SReady, 0);
  433. splx(pl);
  434. }
  435. /*
  436. * ready(p) picks a new priority for a process and sticks it in the
  437. * runq for that priority.
  438. */
  439. void
  440. ready(Proc *p)
  441. {
  442. schedready(procsched(p), p, 0);
  443. }
  444. /*
  445. * yield the processor and drop our priority
  446. */
  447. void
  448. yield(void)
  449. {
  450. if(anyready()){
  451. /* pretend we just used 1/2 tick */
  452. up->lastupdate -= Scaling/2;
  453. sched();
  454. }
  455. }
  456. /*
  457. * recalculate priorities once a second. We need to do this
  458. * since priorities will otherwise only be recalculated when
  459. * the running process blocks.
  460. */
  461. static void
  462. rebalance(void)
  463. {
  464. Mpl pl;
  465. int pri, npri, t;
  466. Schedq *rq;
  467. Proc *p;
  468. t = machp()->ticks;
  469. if(t - run.balancetime < HZ)
  470. return;
  471. run.balancetime = t;
  472. for(pri=0, rq=run.runq; pri<Npriq; pri++, rq++){
  473. another:
  474. p = rq->head;
  475. if(p == nil)
  476. continue;
  477. if(p->mp != m)
  478. continue;
  479. if(pri == p->basepri)
  480. continue;
  481. updatecpu(p);
  482. npri = reprioritize(p);
  483. if(npri != pri){
  484. pl = splhi();
  485. p = dequeueproc(&run, rq, p);
  486. if(p)
  487. queueproc(&run, &run.runq[npri], p, 0);
  488. splx(pl);
  489. goto another;
  490. }
  491. }
  492. }
  493. /*
  494. * Process p is ready to run, but there's no available core.
  495. * Try to make a core available by
  496. * 1. preempting a process with lower priority, or
  497. * 2. preempting one with the same priority that had more than HZ/10, or
  498. * 3. rescheduling one that run more than HZ, in the hope he gets his priority lowered.
  499. */
  500. static void
  501. preemptfor(Proc *p)
  502. {
  503. Proc *up = externup();
  504. uint32_t delta;
  505. uint i, j, rr;
  506. Proc *mup;
  507. Mach *mp;
  508. assert(machp()->machno == 0);
  509. /*
  510. * try to preempt a lower priority process first, default back to
  511. * round robin otherwise.
  512. */
  513. for(rr = 0; rr < 2; rr++)
  514. for(i = 0; i < MACHMAX; i++){
  515. j = pickcore(p->color, i);
  516. if((mp = sys->machptr[j]) != nil && mp->online && mp->nixtype == NIXTC){
  517. if(mp == m)
  518. continue;
  519. /*
  520. * Caution here: mp->proc can change, even die.
  521. */
  522. mup = mp->proc;
  523. if(mup == nil) /* one got idle */
  524. return;
  525. delta = mp->ticks - mp->qstart;
  526. if(mup->priority < p->priority){
  527. mp->qexpired = 1;
  528. return;
  529. }
  530. if(rr && mup->priority == p->priority && delta > HZ/10){
  531. mp->qexpired = 1;
  532. return;
  533. }
  534. if(rr & delta > HZ){
  535. mp->qexpired = 1;
  536. return;
  537. }
  538. }
  539. }
  540. }
  541. /*
  542. * Scheduling thread run as the main loop of cpu 0
  543. * Used in AMP sched.
  544. */
  545. static void
  546. mach0sched(void)
  547. {
  548. Schedq *rq;
  549. Proc *p;
  550. Mach *mp;
  551. uint32_t start, now;
  552. int n, i, j;
  553. assert(machp()->machno == 0);
  554. acmodeset(NIXKC); /* we don't time share any more */
  555. n = 0;
  556. start = perfticks();
  557. loop:
  558. /*
  559. * find a ready process that we might run.
  560. */
  561. spllo();
  562. for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--)
  563. for(p = rq->head; p; p = p->rnext){
  564. /*
  565. * wired processes may only run when their core is available.
  566. */
  567. if(p->wired != nil){
  568. if(p->wired->proc == nil)
  569. goto found;
  570. continue;
  571. }
  572. /*
  573. * find a ready process that did run at an available core
  574. * or one that has not moved for some time.
  575. */
  576. if(p->mp == nil || p->mp->proc == nil || n>0)
  577. goto found;
  578. }
  579. /* waste time or halt the CPU */
  580. idlehands();
  581. /* remember how much time we're here */
  582. now = perfticks();
  583. m->perf.inidle += now-start;
  584. start = now;
  585. n++;
  586. goto loop;
  587. found:
  588. assert(machp()->machno == 0);
  589. splhi();
  590. /*
  591. * find a core for this process, but honor wiring.
  592. */
  593. mp = p->wired;
  594. if(mp != nil){
  595. if(mp->proc != nil)
  596. goto loop;
  597. }else{
  598. for(i = 0; i < MACHMAX; i++){
  599. j = pickcore(p->color, i);
  600. if((mp = sys->machptr[j]) != nil && mp->online && mp->nixtype == NIXTC)
  601. if(mp != m && mp->proc == nil)
  602. break;
  603. }
  604. if(i == MACHMAX){
  605. preemptfor(p);
  606. goto loop;
  607. }
  608. }
  609. p = dequeueproc(&run, rq, p);
  610. mp->proc = p;
  611. if(p != nil){
  612. p->state = Scheding;
  613. p->mp = mp;
  614. }
  615. n = 0;
  616. goto loop;
  617. }
  618. /*
  619. * SMP performs better than AMP with few cores.
  620. * So, leave this here by now. We should probably
  621. * write a unified version of runproc good enough for
  622. * both SMP and AMP.
  623. */
  624. static Proc*
  625. smprunproc(void)
  626. {
  627. Schedq *rq;
  628. Proc *p;
  629. uint32_t start, now;
  630. int i;
  631. start = perfticks();
  632. run.preempts++;
  633. loop:
  634. /*
  635. * find a process that last ran on this processor (affinity),
  636. * or one that hasn't moved in a while (load balancing). Every
  637. * time around the loop affinity goes down.
  638. */
  639. spllo();
  640. for(i = 0;; i++){
  641. /*
  642. * find the highest priority target process that this
  643. * processor can run given affinity constraints.
  644. *
  645. */
  646. for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--){
  647. for(p = rq->head; p; p = p->rnext){
  648. if(p->mp == nil || p->mp == sys->machptr[machp()->machno]
  649. || (!p->wired && i > 0))
  650. goto found;
  651. }
  652. }
  653. /* waste time or halt the CPU */
  654. idlehands();
  655. /* remember how much time we're here */
  656. now = perfticks();
  657. m->perf.inidle += now-start;
  658. start = now;
  659. }
  660. found:
  661. splhi();
  662. p = dequeueproc(&run, rq, p);
  663. if(p == nil)
  664. goto loop;
  665. p->state = Scheding;
  666. p->mp = sys->machptr[machp()->machno];
  667. if(edflock(p)){
  668. edfrun(p, rq == &run.runq[PriEdf]); /* start deadline timer and do admin */
  669. edfunlock();
  670. }
  671. if(p->trace)
  672. proctrace(p, SRun, 0);
  673. return p;
  674. }
  675. /*
  676. * pick a process to run.
  677. * most of this is used in AMP sched.
  678. * (on a quad core or less, we use SMP).
  679. * In the case of core 0 we always return nil, but
  680. * schedule the picked process at any other available TC.
  681. * In the case of other cores we wait until a process is given
  682. * by core 0.
  683. */
  684. Proc*
  685. runproc(void)
  686. {
  687. Schedq *rq;
  688. Proc *p;
  689. uint32_t start, now;
  690. if(sys->nmach <= AMPmincores)
  691. return smprunproc();
  692. start = perfticks();
  693. run.preempts++;
  694. rq = nil;
  695. if(machp()->machno != 0){
  696. do{
  697. spllo();
  698. while(m->proc == nil)
  699. idlehands();
  700. now = perfticks();
  701. m->perf.inidle += now-start;
  702. start = now;
  703. splhi();
  704. p = m->proc;
  705. }while(p == nil);
  706. p->state = Scheding;
  707. p->mp = sys->machptr[machp()->machno];
  708. if(edflock(p)){
  709. edfrun(p, rq == &run.runq[PriEdf]); /* start deadline timer and do admin */
  710. edfunlock();
  711. }
  712. if(p->trace)
  713. proctrace(p, SRun, 0);
  714. return p;
  715. }
  716. mach0sched();
  717. return nil; /* not reached */
  718. }
  719. int
  720. canpage(Proc *p)
  721. {
  722. int ok;
  723. Sched *sch;
  724. splhi();
  725. sch = procsched(p);
  726. lock(sch);
  727. /* Only reliable way to see if we are Running */
  728. if(p->mach == 0) {
  729. p->newtlb = 1;
  730. ok = 1;
  731. }
  732. else
  733. ok = 0;
  734. unlock(sch);
  735. spllo();
  736. return ok;
  737. }
  738. Proc*
  739. newproc(void)
  740. {
  741. Proc *p;
  742. p = psalloc();
  743. p->state = Scheding;
  744. p->psstate = "New";
  745. p->mach = 0;
  746. p->qnext = 0;
  747. p->nchild = 0;
  748. p->nwait = 0;
  749. p->waitq = 0;
  750. p->parent = 0;
  751. p->pgrp = 0;
  752. p->egrp = 0;
  753. p->fgrp = 0;
  754. p->rgrp = 0;
  755. p->pdbg = 0;
  756. p->kp = 0;
  757. if(up != nil && up->procctl == Proc_tracesyscall)
  758. p->procctl = Proc_tracesyscall;
  759. else
  760. p->procctl = 0;
  761. p->syscalltrace = nil;
  762. p->notepending = 0;
  763. p->ureg = 0;
  764. p->privatemem = 0;
  765. p->noswap = 0;
  766. p->errstr = p->errbuf0;
  767. p->syserrstr = p->errbuf1;
  768. p->errbuf0[0] = '\0';
  769. p->errbuf1[0] = '\0';
  770. p->nlocks = 0;
  771. p->delaysched = 0;
  772. p->trace = 0;
  773. kstrdup(&p->user, "*nouser");
  774. kstrdup(&p->text, "*notext");
  775. kstrdup(&p->args, "");
  776. p->nargs = 0;
  777. p->setargs = 0;
  778. memset(p->seg, 0, sizeof p->seg);
  779. p->pid = incref(&pidalloc);
  780. pshash(p);
  781. p->noteid = incref(&noteidalloc);
  782. if(p->pid <= 0 || p->noteid <= 0)
  783. panic("pidalloc");
  784. if(p->kstack == 0)
  785. p->kstack = smalloc(KSTACK);
  786. /* sched params */
  787. p->mp = 0;
  788. p->wired = 0;
  789. procpriority(p, PriNormal, 0);
  790. p->cpu = 0;
  791. p->lastupdate = sys->ticks*Scaling;
  792. p->edf = nil;
  793. p->ntrap = 0;
  794. p->nintr = 0;
  795. p->nsyscall = 0;
  796. p->nactrap = 0;
  797. p->nacsyscall = 0;
  798. p->nicc = 0;
  799. p->actime = 0ULL;
  800. p->tctime = 0ULL;
  801. p->ac = nil;
  802. p->nfullq = 0;
  803. memset(&p->PMMU, 0, sizeof p->PMMU);
  804. return p;
  805. }
  806. /*
  807. * wire this proc to a machine
  808. */
  809. void
  810. procwired(Proc *p, int bm)
  811. {
  812. Proc *up = externup();
  813. Proc *pp;
  814. int i;
  815. char nwired[MACHMAX];
  816. Mach *wm;
  817. if(bm < 0){
  818. /* pick a machine to wire to */
  819. memset(nwired, 0, sizeof(nwired));
  820. p->wired = 0;
  821. for(i=0; (pp = psincref(i)) != nil; i++){
  822. wm = pp->wired;
  823. if(wm && pp->pid)
  824. nwired[wmachp()->machno]++;
  825. psdecref(pp);
  826. }
  827. bm = 0;
  828. for(i=0; i<sys->nmach; i++)
  829. if(nwired[i] < nwired[bm])
  830. bm = i;
  831. } else {
  832. /* use the virtual machine requested */
  833. bm = bm % sys->nmach;
  834. }
  835. p->wired = sys->machptr[bm];
  836. p->mp = p->wired;
  837. /*
  838. * adjust our color to the new domain.
  839. */
  840. if(up == nil || p != up)
  841. return;
  842. up->color = corecolor(up->mp->machno);
  843. qlock(&up->seglock);
  844. for(i = 0; i < NSEG; i++)
  845. if(up->seg[i])
  846. up->seg[i]->color = up->color;
  847. qunlock(&up->seglock);
  848. }
  849. void
  850. procpriority(Proc *p, int pri, int fixed)
  851. {
  852. if(pri >= Npriq)
  853. pri = Npriq - 1;
  854. else if(pri < 0)
  855. pri = 0;
  856. p->basepri = pri;
  857. p->priority = pri;
  858. if(fixed){
  859. p->fixedpri = 1;
  860. } else {
  861. p->fixedpri = 0;
  862. }
  863. }
  864. /*
  865. * sleep if a condition is not true. Another process will
  866. * awaken us after it sets the condition. When we awaken
  867. * the condition may no longer be true.
  868. *
  869. * we lock both the process and the rendezvous to keep r->p
  870. * and p->r synchronized.
  871. */
  872. void
  873. sleep(Rendez *r, int (*f)(void*), void *arg)
  874. {
  875. Mpl pl;
  876. pl = splhi();
  877. if(up->nlocks)
  878. print("process %d sleeps with %d locks held, last lock %#p locked at pc %#p, sleep called from %#p\n",
  879. up->pid, up->nlocks, up->lastlock, up->lastlock->_pc, getcallerpc(&r));
  880. lock(r);
  881. lock(&up->rlock);
  882. if(r->p){
  883. print("double sleep called from %#p, %d %d\n",
  884. getcallerpc(&r), r->p->pid, up->pid);
  885. dumpstack();
  886. }
  887. /*
  888. * Wakeup only knows there may be something to do by testing
  889. * r->p in order to get something to lock on.
  890. * Flush that information out to memory in case the sleep is
  891. * committed.
  892. */
  893. r->p = up;
  894. if((*f)(arg) || up->notepending){
  895. /*
  896. * if condition happened or a note is pending
  897. * never mind
  898. */
  899. r->p = nil;
  900. unlock(&up->rlock);
  901. unlock(r);
  902. } else {
  903. /*
  904. * now we are committed to
  905. * change state and call scheduler
  906. */
  907. if(up->trace)
  908. proctrace(up, SSleep, 0);
  909. up->state = Wakeme;
  910. up->r = r;
  911. /* statistics */
  912. m->cs++;
  913. procsave(up);
  914. mmuflushtlb(machp()->pml4->pa);
  915. if(setlabel(&up->sched)) {
  916. /*
  917. * here when the process is awakened
  918. */
  919. procrestore(up);
  920. spllo();
  921. } else {
  922. /*
  923. * here to go to sleep (i.e. stop Running)
  924. */
  925. unlock(&up->rlock);
  926. unlock(r);
  927. gotolabel(&m->sched);
  928. }
  929. }
  930. if(up->notepending) {
  931. up->notepending = 0;
  932. splx(pl);
  933. if(up->procctl == Proc_exitme && up->closingfgrp)
  934. forceclosefgrp();
  935. error(Eintr);
  936. }
  937. splx(pl);
  938. }
  939. static int
  940. tfn(void *arg)
  941. {
  942. return up->trend == nil || up->tfn(arg);
  943. }
  944. void
  945. twakeup(Ureg* ureg, Timer *t)
  946. {
  947. Proc *p;
  948. Rendez *trend;
  949. p = t->ta;
  950. trend = p->trend;
  951. p->trend = 0;
  952. if(trend)
  953. wakeup(trend);
  954. }
  955. void
  956. tsleep(Rendez *r, int (*fn)(void*), void *arg, int32_t ms)
  957. {
  958. if (up->tt){
  959. print("tsleep: timer active: mode %d, tf %#p\n",
  960. up->tmode, up->tf);
  961. timerdel(up);
  962. }
  963. up->tns = MS2NS(ms);
  964. up->tf = twakeup;
  965. up->tmode = Trelative;
  966. up->ta = up;
  967. up->trend = r;
  968. up->tfn = fn;
  969. timeradd(up);
  970. if(waserror()){
  971. timerdel(up);
  972. nexterror();
  973. }
  974. sleep(r, tfn, arg);
  975. if (up->tt)
  976. timerdel(up);
  977. up->twhen = 0;
  978. poperror();
  979. }
  980. /*
  981. * Expects that only one process can call wakeup for any given Rendez.
  982. * We hold both locks to ensure that r->p and p->r remain consistent.
  983. * Richard Miller has a better solution that doesn't require both to
  984. * be held simultaneously, but I'm a paranoid - presotto.
  985. */
  986. Proc*
  987. wakeup(Rendez *r)
  988. {
  989. Mpl pl;
  990. Proc *p;
  991. pl = splhi();
  992. lock(r);
  993. p = r->p;
  994. if(p != nil){
  995. lock(&p->rlock);
  996. if(p->state != Wakeme || p->r != r)
  997. panic("wakeup: state");
  998. r->p = nil;
  999. p->r = nil;
  1000. ready(p);
  1001. unlock(&p->rlock);
  1002. }
  1003. unlock(r);
  1004. splx(pl);
  1005. return p;
  1006. }
  1007. /*
  1008. * if waking a sleeping process, this routine must hold both
  1009. * p->rlock and r->lock. However, it can't know them in
  1010. * the same order as wakeup causing a possible lock ordering
  1011. * deadlock. We break the deadlock by giving up the p->rlock
  1012. * lock if we can't get the r->lock and retrying.
  1013. */
  1014. int
  1015. postnote(Proc *p, int dolock, char *n, int flag)
  1016. {
  1017. Mpl pl;
  1018. int ret;
  1019. Rendez *r;
  1020. Proc *d, **l;
  1021. if(dolock)
  1022. qlock(&p->debug);
  1023. if(flag != NUser && (p->notify == 0 || p->notified))
  1024. p->nnote = 0;
  1025. ret = 0;
  1026. if(p->nnote < NNOTE) {
  1027. strcpy(p->note[p->nnote].msg, n);
  1028. p->note[p->nnote++].flag = flag;
  1029. ret = 1;
  1030. }
  1031. p->notepending = 1;
  1032. /* NIX */
  1033. if(p->state == Exotic){
  1034. /* it could be that the process is not running
  1035. * in the AC when we interrupt the AC, but then
  1036. * we'd only get an extra interrupt in the AC, and
  1037. * nothing should happen.
  1038. */
  1039. intrac(p);
  1040. }
  1041. if(dolock)
  1042. qunlock(&p->debug);
  1043. /* this loop is to avoid lock ordering problems. */
  1044. for(;;){
  1045. pl = splhi();
  1046. lock(&p->rlock);
  1047. r = p->r;
  1048. /* waiting for a wakeup? */
  1049. if(r == nil)
  1050. break; /* no */
  1051. /* try for the second lock */
  1052. if(canlock(r)){
  1053. if(p->state != Wakeme || r->p != p)
  1054. panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state);
  1055. p->r = nil;
  1056. r->p = nil;
  1057. ready(p);
  1058. unlock(r);
  1059. break;
  1060. }
  1061. /* give other process time to get out of critical section and try again */
  1062. unlock(&p->rlock);
  1063. splx(pl);
  1064. sched();
  1065. }
  1066. unlock(&p->rlock);
  1067. splx(pl);
  1068. if(p->state != Rendezvous){
  1069. if(p->state == Semdown)
  1070. ready(p);
  1071. return ret;
  1072. }
  1073. /* Try and pull out of a rendezvous */
  1074. lock(p->rgrp);
  1075. if(p->state == Rendezvous) {
  1076. p->rendval = ~0;
  1077. l = &REND(p->rgrp, p->rendtag);
  1078. for(d = *l; d; d = d->rendhash) {
  1079. if(d == p) {
  1080. *l = p->rendhash;
  1081. break;
  1082. }
  1083. l = &d->rendhash;
  1084. }
  1085. ready(p);
  1086. }
  1087. unlock(p->rgrp);
  1088. return ret;
  1089. }
  1090. /*
  1091. * weird thing: keep at most NBROKEN around
  1092. */
  1093. #define NBROKEN 4
  1094. struct
  1095. {
  1096. QLock;
  1097. int n;
  1098. Proc *p[NBROKEN];
  1099. }broken;
  1100. void
  1101. addbroken(Proc *p)
  1102. {
  1103. qlock(&broken);
  1104. if(broken.n == NBROKEN) {
  1105. ready(broken.p[0]);
  1106. memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1));
  1107. --broken.n;
  1108. }
  1109. broken.p[broken.n++] = p;
  1110. qunlock(&broken);
  1111. stopac();
  1112. edfstop(up);
  1113. p->state = Broken;
  1114. p->psstate = 0;
  1115. sched();
  1116. }
  1117. void
  1118. unbreak(Proc *p)
  1119. {
  1120. int b;
  1121. qlock(&broken);
  1122. for(b=0; b < broken.n; b++)
  1123. if(broken.p[b] == p) {
  1124. broken.n--;
  1125. memmove(&broken.p[b], &broken.p[b+1],
  1126. sizeof(Proc*)*(NBROKEN-(b+1)));
  1127. ready(p);
  1128. break;
  1129. }
  1130. qunlock(&broken);
  1131. }
  1132. int
  1133. freebroken(void)
  1134. {
  1135. int i, n;
  1136. qlock(&broken);
  1137. n = broken.n;
  1138. for(i=0; i<n; i++) {
  1139. ready(broken.p[i]);
  1140. broken.p[i] = 0;
  1141. }
  1142. broken.n = 0;
  1143. qunlock(&broken);
  1144. return n;
  1145. }
  1146. void
  1147. pexit(char *exitstr, int freemem)
  1148. {
  1149. Proc *p;
  1150. Segment **s, **es;
  1151. int32_t utime, stime;
  1152. Waitq *wq, *f, *next;
  1153. Fgrp *fgrp;
  1154. Egrp *egrp;
  1155. Rgrp *rgrp;
  1156. Pgrp *pgrp;
  1157. Chan *dot;
  1158. if(0 && up->nfullq > 0)
  1159. iprint(" %s=%d", up->text, up->nfullq);
  1160. if(0 && up->nicc > 0)
  1161. iprint(" [%s nicc %ud tctime %ulld actime %ulld]\n",
  1162. up->text, up->nicc, up->tctime, up->actime);
  1163. if(up->syscalltrace != nil)
  1164. free(up->syscalltrace);
  1165. up->syscalltrace = nil;
  1166. up->alarm = 0;
  1167. if (up->tt)
  1168. timerdel(up);
  1169. if(up->trace)
  1170. proctrace(up, SDead, 0);
  1171. /* nil out all the resources under lock (free later) */
  1172. qlock(&up->debug);
  1173. fgrp = up->fgrp;
  1174. up->fgrp = nil;
  1175. egrp = up->egrp;
  1176. up->egrp = nil;
  1177. rgrp = up->rgrp;
  1178. up->rgrp = nil;
  1179. pgrp = up->pgrp;
  1180. up->pgrp = nil;
  1181. dot = up->dot;
  1182. up->dot = nil;
  1183. qunlock(&up->debug);
  1184. if(fgrp)
  1185. closefgrp(fgrp);
  1186. if(egrp)
  1187. closeegrp(egrp);
  1188. if(rgrp)
  1189. closergrp(rgrp);
  1190. if(dot)
  1191. cclose(dot);
  1192. if(pgrp)
  1193. closepgrp(pgrp);
  1194. /*
  1195. * if not a kernel process and have a parent,
  1196. * do some housekeeping.
  1197. */
  1198. if(up->kp == 0) {
  1199. p = up->parent;
  1200. if(p == 0) {
  1201. if(exitstr == 0)
  1202. exitstr = "unknown";
  1203. panic("boot process died: %s", exitstr);
  1204. }
  1205. while(waserror())
  1206. ;
  1207. wq = smalloc(sizeof(Waitq));
  1208. poperror();
  1209. wq->w.pid = up->pid;
  1210. utime = up->time[TUser] + up->time[TCUser];
  1211. stime = up->time[TSys] + up->time[TCSys];
  1212. wq->w.time[TUser] = tk2ms(utime);
  1213. wq->w.time[TSys] = tk2ms(stime);
  1214. wq->w.time[TReal] = tk2ms(sys->ticks - up->time[TReal]);
  1215. if(exitstr && exitstr[0])
  1216. snprint(wq->w.msg, sizeof(wq->w.msg), "%s %d: %s",
  1217. up->text, up->pid, exitstr);
  1218. else
  1219. wq->w.msg[0] = '\0';
  1220. lock(&p->exl);
  1221. /*
  1222. * Check that parent is still alive.
  1223. */
  1224. if(p->pid == up->parentpid && p->state != Broken) {
  1225. p->nchild--;
  1226. p->time[TCUser] += utime;
  1227. p->time[TCSys] += stime;
  1228. /*
  1229. * If there would be more than 128 wait records
  1230. * processes for my parent, then don't leave a wait
  1231. * record behind. This helps prevent badly written
  1232. * daemon processes from accumulating lots of wait
  1233. * records.
  1234. */
  1235. if(p->nwait < 128) {
  1236. wq->next = p->waitq;
  1237. p->waitq = wq;
  1238. p->nwait++;
  1239. wq = nil;
  1240. wakeup(&p->waitr);
  1241. }
  1242. }
  1243. unlock(&p->exl);
  1244. if(wq)
  1245. free(wq);
  1246. }
  1247. if(!freemem)
  1248. addbroken(up);
  1249. qlock(&up->seglock);
  1250. es = &up->seg[NSEG];
  1251. for(s = up->seg; s < es; s++) {
  1252. if(*s) {
  1253. putseg(*s);
  1254. *s = 0;
  1255. }
  1256. }
  1257. qunlock(&up->seglock);
  1258. lock(&up->exl); /* Prevent my children from leaving waits */
  1259. psunhash(up);
  1260. up->pid = 0;
  1261. wakeup(&up->waitr);
  1262. unlock(&up->exl);
  1263. for(f = up->waitq; f; f = next) {
  1264. next = f->next;
  1265. free(f);
  1266. }
  1267. /* release debuggers */
  1268. qlock(&up->debug);
  1269. if(up->pdbg) {
  1270. wakeup(&up->pdbg->sleep);
  1271. up->pdbg = 0;
  1272. }
  1273. qunlock(&up->debug);
  1274. /* Sched must not loop for these locks */
  1275. lock(&procalloc);
  1276. lock(&pga);
  1277. stopac();
  1278. edfstop(up);
  1279. up->state = Moribund;
  1280. sched();
  1281. panic("pexit");
  1282. }
  1283. int
  1284. haswaitq(void *x)
  1285. {
  1286. Proc *p;
  1287. p = (Proc *)x;
  1288. return p->waitq != 0;
  1289. }
  1290. int
  1291. pwait(Waitmsg *w)
  1292. {
  1293. int cpid;
  1294. Waitq *wq;
  1295. if(!canqlock(&up->qwaitr))
  1296. error(Einuse);
  1297. if(waserror()) {
  1298. qunlock(&up->qwaitr);
  1299. nexterror();
  1300. }
  1301. lock(&up->exl);
  1302. if(up->nchild == 0 && up->waitq == 0) {
  1303. unlock(&up->exl);
  1304. error(Enochild);
  1305. }
  1306. unlock(&up->exl);
  1307. sleep(&up->waitr, haswaitq, up);
  1308. lock(&up->exl);
  1309. wq = up->waitq;
  1310. up->waitq = wq->next;
  1311. up->nwait--;
  1312. unlock(&up->exl);
  1313. qunlock(&up->qwaitr);
  1314. poperror();
  1315. if(w)
  1316. memmove(w, &wq->w, sizeof(Waitmsg));
  1317. cpid = wq->w.pid;
  1318. free(wq);
  1319. return cpid;
  1320. }
  1321. void
  1322. dumpaproc(Proc *p)
  1323. {
  1324. uintptr_t bss;
  1325. char *s;
  1326. if(p == 0)
  1327. return;
  1328. bss = 0;
  1329. if(p->seg[HSEG])
  1330. bss = p->seg[HSEG]->top;
  1331. else if(p->seg[BSEG])
  1332. bss = p->seg[BSEG]->top;
  1333. s = p->psstate;
  1334. if(s == 0)
  1335. s = statename[p->state];
  1336. print("%3d:%10s pc %#p dbgpc %#p %8s (%s) ut %ld st %ld bss %#p qpc %#p nl %d nd %lud lpc %#p pri %lud\n",
  1337. p->pid, p->text, p->pc, dbgpc(p), s, statename[p->state],
  1338. p->time[0], p->time[1], bss, p->qpc, p->nlocks,
  1339. p->delaysched, p->lastlock ? p->lastlock->_pc : 0, p->priority);
  1340. }
  1341. void
  1342. procdump(void)
  1343. {
  1344. int i;
  1345. Proc *p;
  1346. if(up)
  1347. print("up %d\n", up->pid);
  1348. else
  1349. print("no current process\n");
  1350. for(i=0; (p = psincref(i)) != nil; i++) {
  1351. if(p->state != Dead)
  1352. dumpaproc(p);
  1353. psdecref(p);
  1354. }
  1355. }
  1356. /*
  1357. * wait till all processes have flushed their mmu
  1358. * state about segement s
  1359. */
  1360. void
  1361. procflushseg(Segment *s)
  1362. {
  1363. Proc *up = externup();
  1364. int i, ns, nm, nwait;
  1365. Proc *p;
  1366. Mach *mp;
  1367. /*
  1368. * tell all processes with this
  1369. * segment to flush their mmu's
  1370. */
  1371. nwait = 0;
  1372. for(i=0; (p = psincref(i)) != nil; i++) {
  1373. if(p->state == Dead){
  1374. psdecref(p);
  1375. continue;
  1376. }
  1377. for(ns = 0; ns < NSEG; ns++){
  1378. if(p->seg[ns] == s){
  1379. p->newtlb = 1;
  1380. for(nm = 0; nm < MACHMAX; nm++)
  1381. if((mp = sys->machptr[nm]) != nil && mp->online)
  1382. if(mp->proc == p){
  1383. mp->mmuflush = 1;
  1384. nwait++;
  1385. }
  1386. break;
  1387. }
  1388. }
  1389. psdecref(p);
  1390. }
  1391. if(nwait == 0)
  1392. return;
  1393. /*
  1394. * wait for all processors to take a clock interrupt
  1395. * and flush their mmu's.
  1396. * NIX BUG: this won't work if another core is in AC mode.
  1397. * In that case we must IPI it, but only if that core is
  1398. * using this segment.
  1399. */
  1400. for(i = 0; i < MACHMAX; i++)
  1401. if((mp = sys->machptr[i]) != nil && mp->online)
  1402. if(mp != m)
  1403. while(mp->mmuflush)
  1404. sched();
  1405. }
  1406. void
  1407. scheddump(void)
  1408. {
  1409. Proc *p;
  1410. Schedq *rq;
  1411. for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--){
  1412. if(rq->head == 0)
  1413. continue;
  1414. print("run[%ld]:", rq-run.runq);
  1415. for(p = rq->head; p; p = p->rnext)
  1416. print(" %d(%lud)", p->pid, machp()->ticks - p->readytime);
  1417. print("\n");
  1418. delay(150);
  1419. }
  1420. print("nrdy %d\n", run.nrdy);
  1421. }
  1422. void
  1423. kproc(char *name, void (*func)(void *), void *arg)
  1424. {
  1425. Proc *p;
  1426. static Pgrp *kpgrp;
  1427. p = newproc();
  1428. p->psstate = 0;
  1429. p->procmode = 0640;
  1430. p->kp = 1;
  1431. p->noswap = 1;
  1432. p->scallnr = up->scallnr;
  1433. memmove(p->arg, up->arg, sizeof(up->arg));
  1434. p->nerrlab = 0;
  1435. p->slash = up->slash;
  1436. p->dot = up->dot;
  1437. if(p->dot)
  1438. incref(p->dot);
  1439. memmove(p->note, up->note, sizeof(p->note));
  1440. p->nnote = up->nnote;
  1441. p->notified = 0;
  1442. p->lastnote = up->lastnote;
  1443. p->notify = up->notify;
  1444. p->ureg = 0;
  1445. p->dbgreg = 0;
  1446. procpriority(p, PriKproc, 0);
  1447. kprocchild(p, func, arg);
  1448. kstrdup(&p->user, eve);
  1449. kstrdup(&p->text, name);
  1450. if(kpgrp == 0)
  1451. kpgrp = newpgrp();
  1452. p->pgrp = kpgrp;
  1453. incref(kpgrp);
  1454. memset(p->time, 0, sizeof(p->time));
  1455. p->time[TReal] = sys->ticks;
  1456. ready(p);
  1457. /*
  1458. * since the bss/data segments are now shareable,
  1459. * any mmu info about this process is now stale
  1460. * and has to be discarded.
  1461. */
  1462. p->newtlb = 1;
  1463. mmuflush();
  1464. }
  1465. /*
  1466. * called splhi() by notify(). See comment in notify for the
  1467. * reasoning.
  1468. */
  1469. void
  1470. procctl(Proc *p)
  1471. {
  1472. Mpl pl;
  1473. char *state;
  1474. switch(p->procctl) {
  1475. case Proc_exitbig:
  1476. spllo();
  1477. pexit("Killed: Insufficient physical memory", 1);
  1478. case Proc_exitme:
  1479. spllo(); /* pexit has locks in it */
  1480. pexit("Killed", 1);
  1481. case Proc_traceme:
  1482. if(p->nnote == 0)
  1483. return;
  1484. /* No break */
  1485. case Proc_stopme:
  1486. p->procctl = 0;
  1487. state = p->psstate;
  1488. p->psstate = "Stopped";
  1489. /* free a waiting debugger */
  1490. pl = spllo();
  1491. qlock(&p->debug);
  1492. if(p->pdbg) {
  1493. wakeup(&p->pdbg->sleep);
  1494. p->pdbg = 0;
  1495. }
  1496. qunlock(&p->debug);
  1497. splhi();
  1498. p->state = Stopped;
  1499. sched();
  1500. p->psstate = state;
  1501. splx(pl);
  1502. return;
  1503. case Proc_toac:
  1504. p->procctl = 0;
  1505. /*
  1506. * This pretends to return from the system call,
  1507. * by moving to a core, but never returns (unless
  1508. * the process gets moved back to a TC.)
  1509. */
  1510. spllo();
  1511. runacore();
  1512. return;
  1513. case Proc_totc:
  1514. p->procctl = 0;
  1515. if(p != up)
  1516. panic("procctl: stopac: p != up");
  1517. spllo();
  1518. stopac();
  1519. return;
  1520. }
  1521. }
  1522. void
  1523. error(char *err)
  1524. {
  1525. spllo();
  1526. assert(up->nerrlab < NERR);
  1527. kstrcpy(up->errstr, err, ERRMAX);
  1528. setlabel(&up->errlab[NERR-1]);
  1529. nexterror();
  1530. }
  1531. void
  1532. nexterror(void)
  1533. {
  1534. gotolabel(&up->errlab[--up->nerrlab]);
  1535. }
  1536. void
  1537. exhausted(char *resource)
  1538. {
  1539. char buf[ERRMAX];
  1540. sprint(buf, "no free %s", resource);
  1541. iprint("%s\n", buf);
  1542. error(buf);
  1543. }
  1544. void
  1545. killbig(char *why)
  1546. {
  1547. int i, x;
  1548. Segment *s;
  1549. uint32_t l, max;
  1550. Proc *p, *kp;
  1551. max = 0;
  1552. kp = nil;
  1553. for(x = 0; (p = psincref(x)) != nil; x++) {
  1554. if(p->state == Dead || p->kp){
  1555. psdecref(p);
  1556. continue;
  1557. }
  1558. l = 0;
  1559. for(i=1; i<NSEG; i++) {
  1560. s = p->seg[i];
  1561. if(s != 0)
  1562. l += s->top - s->base;
  1563. }
  1564. if(l > max && ((p->procmode&0222) || strcmp(eve, p->user)!=0)) {
  1565. if(kp != nil)
  1566. psdecref(kp);
  1567. kp = p;
  1568. max = l;
  1569. }
  1570. else
  1571. psdecref(p);
  1572. }
  1573. if(kp == nil)
  1574. return;
  1575. print("%d: %s killed: %s\n", kp->pid, kp->text, why);
  1576. for(x = 0; (p = psincref(x)) != nil; x++) {
  1577. if(p->state == Dead || p->kp){
  1578. psdecref(p);
  1579. continue;
  1580. }
  1581. if(p != kp && p->seg[BSEG] && p->seg[BSEG] == kp->seg[BSEG])
  1582. p->procctl = Proc_exitbig;
  1583. psdecref(p);
  1584. }
  1585. kp->procctl = Proc_exitbig;
  1586. for(i = 0; i < NSEG; i++) {
  1587. s = kp->seg[i];
  1588. if(s != 0 && canqlock(&s->lk)) {
  1589. mfreeseg(s, s->base, (s->top - s->base)/BIGPGSZ);
  1590. qunlock(&s->lk);
  1591. }
  1592. }
  1593. psdecref(kp);
  1594. }
  1595. /*
  1596. * change ownership to 'new' of all processes owned by 'old'. Used when
  1597. * eve changes.
  1598. */
  1599. void
  1600. renameuser(char *old, char *new)
  1601. {
  1602. int i;
  1603. Proc *p;
  1604. for(i = 0; (p = psincref(i)) != nil; i++){
  1605. if(p->user!=nil && strcmp(old, p->user)==0)
  1606. kstrdup(&p->user, new);
  1607. psdecref(p);
  1608. }
  1609. }
  1610. /*
  1611. * time accounting called by clock() splhi'd
  1612. * only cpu1 computes system load average
  1613. * but the system load average is accounted for cpu0.
  1614. */
  1615. void
  1616. accounttime(void)
  1617. {
  1618. Proc *p;
  1619. uint32_t n, per;
  1620. p = m->proc;
  1621. if(p) {
  1622. if(machp()->machno == 1)
  1623. run.nrun++;
  1624. p->time[p->insyscall]++;
  1625. }
  1626. /* calculate decaying duty cycles */
  1627. n = perfticks();
  1628. per = n - m->perf.last;
  1629. m->perf.last = n;
  1630. per = (m->perf.period*(HZ-1) + per)/HZ;
  1631. if(per != 0)
  1632. m->perf.period = per;
  1633. m->perf.avg_inidle = (m->perf.avg_inidle*(HZ-1)+m->perf.inidle)/HZ;
  1634. m->perf.inidle = 0;
  1635. m->perf.avg_inintr = (m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ;
  1636. m->perf.inintr = 0;
  1637. /* only one processor gets to compute system load averages.
  1638. * it has to be mach 1 when we use AMP.
  1639. */
  1640. if(sys->nmach > 1 && machp()->machno != 1)
  1641. return;
  1642. /*
  1643. * calculate decaying load average.
  1644. * if we decay by (n-1)/n then it takes
  1645. * n clock ticks to go from load L to .36 L once
  1646. * things quiet down. it takes about 5 n clock
  1647. * ticks to go to zero. so using HZ means this is
  1648. * approximately the load over the last second,
  1649. * with a tail lasting about 5 seconds.
  1650. */
  1651. n = run.nrun;
  1652. run.nrun = 0;
  1653. n = (run.nrdy+n)*1000;
  1654. sys->load = (sys->load*(HZ-1)+n)/HZ;
  1655. }
  1656. void
  1657. halt(void)
  1658. {
  1659. if(run.nrdy != 0)
  1660. return;
  1661. hardhalt();
  1662. }