proc.c 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. #include "../port/error.h"
  15. #include "../port/edf.h"
  16. #include "errstr.h"
  17. #include <trace.h>
  18. extern int nosmp;
  19. enum
  20. {
  21. Scaling=2,
  22. AMPmincores = 5,
  23. };
  24. Ref noteidalloc;
  25. static Ref pidalloc;
  26. static Sched run;
  27. struct Procalloc procalloc;
  28. extern Proc* psalloc(void);
  29. extern void pshash(Proc*);
  30. extern void psrelease(Proc*);
  31. extern void psunhash(Proc*);
  32. static int reprioritize(Proc*);
  33. static void updatecpu(Proc*);
  34. static void rebalance(void);
  35. char *statename[] =
  36. { /* BUG: generate automatically */
  37. "Dead",
  38. "Moribund",
  39. "Ready",
  40. "Scheding",
  41. "Running",
  42. "Queueing",
  43. "QueueingR",
  44. "QueueingW",
  45. "Wakeme",
  46. "Broken",
  47. "Stopped",
  48. "Rendez",
  49. "Waitrelease",
  50. "Exotic",
  51. "Down",
  52. };
  53. #if 0
  54. void
  55. debuggotolabel(Label *p)
  56. {
  57. Proc *up = externup();
  58. if(0)hi("debuggotolabel");
  59. iprint("gotolabel: pid %p rip %p sp %p\n",
  60. m && up? up->pid : 0,
  61. (void *)p->pc,
  62. (void *)p->sp);
  63. /*
  64. */
  65. if (!p->pc)
  66. die("PC IS ZERO!");
  67. /* this is an example of putting a breakpoint
  68. * here so we can capture a particular process.
  69. * startup is very deterministic so this can
  70. * be very useful. You can then attach with
  71. * gdb and single step. In practice this helped us show
  72. * that our return stack for sysrforkret was bogus.
  73. if (m && up && up->pid == 6)
  74. die("PID 6\n");
  75. */
  76. gotolabel(p);
  77. }
  78. #endif
  79. Sched*
  80. procsched(Proc *p)
  81. {
  82. return &run;
  83. }
  84. /*
  85. * bad planning, once more.
  86. */
  87. void
  88. procinit0(void)
  89. {
  90. run.schedgain = 30;
  91. }
  92. /*
  93. * Always splhi()'ed.
  94. */
  95. void
  96. schedinit(void) /* never returns */
  97. {
  98. Edf *e;
  99. machp()->inidle = 1;
  100. machp()->proc = nil;
  101. ainc(&run.nmach);
  102. setlabel(&machp()->sched);
  103. Proc *up = externup();
  104. if(infected_with_std()){
  105. print("mach %d got an std from %s (pid %d)!\n",
  106. machp()->machno,
  107. up ? up->text : "*notext",
  108. up ? up->pid : -1
  109. );
  110. disinfect_std();
  111. }
  112. if(up) {
  113. if((e = up->edf) && (e->flags & Admitted))
  114. edfrecord(up);
  115. machp()->qstart = 0;
  116. machp()->qexpired = 0;
  117. coherence();
  118. machp()->proc = 0;
  119. switch(up->state) {
  120. case Running:
  121. ready(up);
  122. break;
  123. case Moribund:
  124. up->state = Dead;
  125. stopac();
  126. edfstop(up);
  127. if (up->edf)
  128. free(up->edf);
  129. up->edf = nil;
  130. /*
  131. * Holding locks from pexit:
  132. * procalloc
  133. * pga
  134. */
  135. mmurelease(up);
  136. unlock(&pga);
  137. psrelease(up);
  138. unlock(&procalloc);
  139. break;
  140. }
  141. up->mach = nil;
  142. updatecpu(up);
  143. machp()->externup = nil;
  144. }
  145. sched();
  146. }
  147. /*
  148. * If changing this routine, look also at sleep(). It
  149. * contains a copy of the guts of sched().
  150. */
  151. void
  152. sched(void)
  153. {
  154. Proc *up = externup();
  155. Proc *p;
  156. if(!islo() && machp()->ilockdepth)
  157. panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p",
  158. machp()->machno,
  159. machp()->ilockdepth,
  160. up? up->lastilock: nil,
  161. (up && up->lastilock)? up->lastilock->_pc: 0,
  162. getcallerpc(&p+2));
  163. kstackok();
  164. if(up){
  165. /*
  166. * Delay the sched until the process gives up the locks
  167. * it is holding. This avoids dumb lock loops.
  168. * Don't delay if the process is Moribund.
  169. * It called sched to die.
  170. * But do sched eventually. This avoids a missing unlock
  171. * from hanging the entire kernel.
  172. * But don't reschedule procs holding palloc or procalloc.
  173. * Those are far too important to be holding while asleep.
  174. *
  175. * This test is not exact. There can still be a few
  176. * instructions in the middle of taslock when a process
  177. * holds a lock but Lock.p has not yet been initialized.
  178. */
  179. if(up->nlocks)
  180. if(up->state != Moribund)
  181. if(up->delaysched < 20
  182. || pga.Lock.p == up
  183. || procalloc.Lock.p == up){
  184. up->delaysched++;
  185. run.delayedscheds++;
  186. return;
  187. }
  188. up->delaysched = 0;
  189. splhi();
  190. /* statistics */
  191. if(up->nqtrap == 0 && up->nqsyscall == 0)
  192. up->nfullq++;
  193. machp()->cs++;
  194. procsave(up);
  195. mmuflushtlb(machp()->pml4->pa);
  196. if(setlabel(&up->sched)){
  197. procrestore(up);
  198. spllo();
  199. return;
  200. }
  201. /*debug*/gotolabel(&machp()->sched);
  202. }
  203. machp()->inidle = 1;
  204. p = runproc(); /* core 0 never returns */
  205. machp()->inidle = 0;
  206. if(!p->edf){
  207. updatecpu(p);
  208. p->priority = reprioritize(p);
  209. }
  210. if(nosmp){
  211. if(p != machp()->readied)
  212. machp()->schedticks = machp()->ticks + HZ/10;
  213. machp()->readied = 0;
  214. }
  215. machp()->externup = p;
  216. up = p;
  217. machp()->qstart = machp()->ticks;
  218. up->nqtrap = 0;
  219. up->nqsyscall = 0;
  220. up->state = Running;
  221. //up->mach = m;
  222. up->mach = sys->machptr[machp()->machno];
  223. machp()->proc = up;
  224. // iprint("up->sched.sp %p * %p\n", up->sched.sp,
  225. // *(void **) up->sched.sp);
  226. mmuswitch(up);
  227. assert(!up->wired || up->wired == machp());
  228. if (0) hi("gotolabel\n");
  229. /*debug*/gotolabel(&up->sched);
  230. }
  231. int
  232. anyready(void)
  233. {
  234. return run.runvec;
  235. }
  236. int
  237. anyhigher(void)
  238. {
  239. Proc *up = externup();
  240. return run.runvec & ~((1<<(up->priority+1))-1);
  241. }
  242. /*
  243. * here once per clock tick to see if we should resched
  244. */
  245. void
  246. hzsched(void)
  247. {
  248. Proc *up = externup();
  249. /* once a second, rebalance will reprioritize ready procs */
  250. if(machp()->machno == 0)
  251. rebalance();
  252. /* with <= 4 cores, we use SMP and core 0 does not set qexpired for us */
  253. //if(sys->nmach <= AMPmincores)
  254. if(machp()->ticks - machp()->qstart >= HZ/100)
  255. machp()->qexpired = 1;
  256. /* unless preempted, get to run */
  257. if(machp()->qexpired && anyready())
  258. up->delaysched++;
  259. }
  260. #if 0
  261. void
  262. hzsched(void)
  263. {
  264. Proc *up = externup();
  265. /* once a second, rebalance will reprioritize ready procs */
  266. if(machp()->machno == 0)
  267. rebalance();
  268. /* unless preempted, get to run for at least 100ms */
  269. if(anyhigher()
  270. || (!up->fixedpri && machp()->ticks > m->schedticks && anyready())){
  271. m->readied = nil; /* avoid cooperative scheduling */
  272. up->delaysched++;
  273. }
  274. }
  275. #endif
  276. /*
  277. * here at the end of non-clock interrupts to see if we should preempt the
  278. * current process. Returns 1 if preempted, 0 otherwise.
  279. */
  280. int
  281. preempted(void)
  282. {
  283. Proc *up = externup();
  284. if(up && up->state == Running)
  285. if(up->preempted == 0)
  286. if(anyhigher())
  287. if(!active.exiting){
  288. /* Core 0 is dispatching all interrupts, so no core
  289. * actually running a user process is ever going call preempted, unless
  290. * we consider IPIs for preemption or we distribute interrupts.
  291. * But we are going to use SMP for machines with few cores.
  292. panic("preemted used");
  293. */
  294. up->preempted = 1;
  295. sched();
  296. splhi();
  297. up->preempted = 0;
  298. return 1;
  299. }
  300. return 0;
  301. }
  302. #if 0
  303. int
  304. preempted(void)
  305. {
  306. Proc *up = externup();
  307. if(up && up->state == Running)
  308. if(up->preempted == 0)
  309. if(anyhigher())
  310. if(!active.exiting){
  311. m->readied = nil; /* avoid cooperative scheduling */
  312. up->preempted = 1;
  313. sched();
  314. splhi();
  315. up->preempted = 0;
  316. return 1;
  317. }
  318. return 0;
  319. }
  320. #endif
  321. /*
  322. * Update the cpu time average for this particular process,
  323. * which is about to change from up -> not up or vice versa.
  324. * p->lastupdate is the last time an updatecpu happened.
  325. *
  326. * The cpu time average is a decaying average that lasts
  327. * about D clock ticks. D is chosen to be approximately
  328. * the cpu time of a cpu-intensive "quick job". A job has to run
  329. * for approximately D clock ticks before we home in on its
  330. * actual cpu usage. Thus if you manage to get in and get out
  331. * quickly, you won't be penalized during your burst. Once you
  332. * start using your share of the cpu for more than about D
  333. * clock ticks though, your p->cpu hits 1000 (1.0) and you end up
  334. * below all the other quick jobs. Interactive tasks, because
  335. * they basically always use less than their fair share of cpu,
  336. * will be rewarded.
  337. *
  338. * If the process has not been running, then we want to
  339. * apply the filter
  340. *
  341. * cpu = cpu * (D-1)/D
  342. *
  343. * n times, yielding
  344. *
  345. * cpu = cpu * ((D-1)/D)^n
  346. *
  347. * but D is big enough that this is approximately
  348. *
  349. * cpu = cpu * (D-n)/D
  350. *
  351. * so we use that instead.
  352. *
  353. * If the process has been running, we apply the filter to
  354. * 1 - cpu, yielding a similar equation. Note that cpu is
  355. * stored in fixed point (* 1000).
  356. *
  357. * Updatecpu must be called before changing up, in order
  358. * to maintain accurate cpu usage statistics. It can be called
  359. * at any time to bring the stats for a given proc up-to-date.
  360. */
  361. static void
  362. updatecpu(Proc *p)
  363. {
  364. Proc *up = externup();
  365. int D, n, t, ocpu;
  366. if(p->edf)
  367. return;
  368. //t = sys->ticks*Scaling + Scaling/2;
  369. t = sys->machptr[0]->ticks*Scaling + Scaling/2; //Originally MACHP(0)
  370. n = t - p->lastupdate;
  371. p->lastupdate = t;
  372. if(n == 0)
  373. return;
  374. D = run.schedgain*HZ*Scaling;
  375. if(n > D)
  376. n = D;
  377. ocpu = p->cpu;
  378. if(p != up)
  379. p->cpu = (ocpu*(D-n))/D;
  380. else{
  381. t = 1000 - ocpu;
  382. t = (t*(D-n))/D;
  383. p->cpu = 1000 - t;
  384. }
  385. //iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu);
  386. }
  387. /*
  388. * On average, p has used p->cpu of a cpu recently.
  389. * Its fair share is nmach/m->load of a cpu. If it has been getting
  390. * too much, penalize it. If it has been getting not enough, reward it.
  391. * I don't think you can get much more than your fair share that
  392. * often, so most of the queues are for using less. Having a priority
  393. * of 3 means you're just right. Having a higher priority (up to p->basepri)
  394. * means you're not using as much as you could.
  395. */
  396. static int
  397. reprioritize(Proc *p)
  398. {
  399. int fairshare, n, load, ratio;
  400. load = sys->load;
  401. if(load == 0)
  402. return p->basepri;
  403. /*
  404. * fairshare = 1.000 * conf.nproc * 1.000/load,
  405. * except the decimal point is moved three places
  406. * on both load and fairshare.
  407. */
  408. fairshare = (sys->nmach*1000*1000)/load;
  409. n = p->cpu;
  410. if(n == 0)
  411. n = 1;
  412. ratio = (fairshare+n/2) / n;
  413. if(ratio > p->basepri)
  414. ratio = p->basepri;
  415. if(ratio < 0)
  416. panic("reprioritize");
  417. //iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio);
  418. return ratio;
  419. }
  420. /*
  421. * add a process to a scheduling queue
  422. */
  423. static void
  424. queueproc(Sched *sch, Schedq *rq, Proc *p, int locked)
  425. {
  426. int pri;
  427. pri = rq - sch->runq;
  428. if(!locked)
  429. lock(sch);
  430. else if(canlock(sch))
  431. panic("queueproc: locked and can lock");
  432. p->priority = pri;
  433. p->rnext = 0;
  434. if(rq->tail)
  435. rq->tail->rnext = p;
  436. else
  437. rq->head = p;
  438. rq->tail = p;
  439. rq->n++;
  440. sch->nrdy++;
  441. sch->runvec |= 1<<pri;
  442. if(!locked)
  443. unlock(sch);
  444. }
  445. /*
  446. * try to remove a process from a scheduling queue (called splhi)
  447. */
  448. Proc*
  449. dequeueproc(Sched *sch, Schedq *rq, Proc *tp)
  450. {
  451. Proc *l, *p;
  452. if(!canlock(sch))
  453. return nil;
  454. /*
  455. * the queue may have changed before we locked runq,
  456. * refind the target process.
  457. */
  458. l = 0;
  459. for(p = rq->head; p; p = p->rnext){
  460. if(p == tp)
  461. break;
  462. l = p;
  463. }
  464. /*
  465. * p->mach==0 only when process state is saved
  466. */
  467. if(p == 0 || p->mach){
  468. unlock(sch);
  469. return nil;
  470. }
  471. if(p->rnext == 0)
  472. rq->tail = l;
  473. if(l)
  474. l->rnext = p->rnext;
  475. else
  476. rq->head = p->rnext;
  477. if(rq->head == nil)
  478. sch->runvec &= ~(1<<(rq-sch->runq));
  479. rq->n--;
  480. sch->nrdy--;
  481. if(p->state != Ready)
  482. print("dequeueproc %s %d %s\n", p->text, p->pid, statename[p->state]);
  483. unlock(sch);
  484. return p;
  485. }
  486. static void
  487. schedready(Sched *sch, Proc *p, int locked)
  488. {
  489. Mpl pl;
  490. int pri;
  491. Schedq *rq;
  492. pl = splhi();
  493. if(edfready(p)){
  494. splx(pl);
  495. return;
  496. }
  497. /* if(up != p)
  498. m->readied = p; *//* group scheduling, will be removed */
  499. updatecpu(p);
  500. pri = reprioritize(p);
  501. p->priority = pri;
  502. rq = &sch->runq[pri];
  503. p->state = Ready;
  504. queueproc(sch, rq, p, locked);
  505. if(p->trace)
  506. proctrace(p, SReady, 0);
  507. splx(pl);
  508. }
  509. /*
  510. * ready(p) picks a new priority for a process and sticks it in the
  511. * runq for that priority.
  512. */
  513. void
  514. ready(Proc *p)
  515. {
  516. schedready(procsched(p), p, 0);
  517. }
  518. /*
  519. * yield the processor and drop our priority
  520. */
  521. void
  522. yield(void)
  523. {
  524. Proc *up = externup();
  525. if(anyready()){
  526. /* pretend we just used 1/2 tick */
  527. up->lastupdate -= Scaling/2;
  528. sched();
  529. }
  530. }
  531. /*
  532. * recalculate priorities once a second. We need to do this
  533. * since priorities will otherwise only be recalculated when
  534. * the running process blocks.
  535. */
  536. static void
  537. rebalance(void)
  538. {
  539. Mpl pl;
  540. int pri, npri, t;
  541. Schedq *rq;
  542. Proc *p;
  543. t = machp()->ticks;
  544. if(t - run.balancetime < HZ)
  545. return;
  546. run.balancetime = t;
  547. for(pri=0, rq=run.runq; pri<Npriq; pri++, rq++){
  548. another:
  549. p = rq->head;
  550. if(p == nil)
  551. continue;
  552. if(p->mp != sys->machptr[machp()->machno]) //MACHP(machp()->machno)
  553. continue;
  554. if(pri == p->basepri)
  555. continue;
  556. updatecpu(p);
  557. npri = reprioritize(p);
  558. if(npri != pri){
  559. pl = splhi();
  560. p = dequeueproc(&run, rq, p);
  561. if(p)
  562. queueproc(&run, &run.runq[npri], p, 0);
  563. splx(pl);
  564. goto another;
  565. }
  566. }
  567. }
  568. /*
  569. * Process p is ready to run, but there's no available core.
  570. * Try to make a core available by
  571. * 1. preempting a process with lower priority, or
  572. * 2. preempting one with the same priority that had more than HZ/10, or
  573. * 3. rescheduling one that run more than HZ, in the hope he gets his priority lowered.
  574. */
  575. static void
  576. preemptfor(Proc *p)
  577. {
  578. Proc *up = externup();
  579. uint32_t delta;
  580. uint i, /*j,*/ rr;
  581. Proc *mup;
  582. Mach *mp;
  583. assert(machp()->machno == 0);
  584. /*
  585. * try to preempt a lower priority process first, default back to
  586. * round robin otherwise.
  587. */
  588. for(rr = 0; rr < 2; rr++)
  589. for(i = 0; i < MACHMAX; i++){
  590. /*j = pickcore(p->color, i);
  591. if((mp = sys->machptr[j]) != nil && mp->online && mp->nixtype == NIXTC){*/
  592. if((mp = sys->machptr[i]) != nil && mp->online && mp->nixtype == NIXTC){
  593. if(mp == machp())
  594. continue;
  595. /*
  596. * Caution here: mp->proc can change, even die.
  597. */
  598. mup = mp->proc;
  599. if(mup == nil) /* one got idle */
  600. return;
  601. delta = mp->ticks - mp->qstart;
  602. if(up->priority < p->priority){
  603. mp->qexpired = 1;
  604. return;
  605. }
  606. if(rr && up->priority == p->priority && delta > HZ/10){
  607. mp->qexpired = 1;
  608. return;
  609. }
  610. if(rr & delta > HZ){
  611. mp->qexpired = 1;
  612. return;
  613. }
  614. }
  615. }
  616. }
  617. /*
  618. * Scheduling thread run as the main loop of cpu 0
  619. * Used in AMP sched.
  620. */
  621. static void
  622. mach0sched(void)
  623. {
  624. Schedq *rq;
  625. Proc *p;
  626. Mach *mp;
  627. uint32_t start, now;
  628. int n, i; //, j;
  629. assert(machp()->machno == 0);
  630. acmodeset(NIXKC); /* we don't time share any more */
  631. n = 0;
  632. start = perfticks();
  633. loop:
  634. /*
  635. * find a ready process that we might run.
  636. */
  637. spllo();
  638. for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--)
  639. for(p = rq->head; p; p = p->rnext){
  640. /*
  641. * wired processes may only run when their core is available.
  642. */
  643. if(p->wired != nil){
  644. if(p->wired->proc == nil)
  645. goto found;
  646. continue;
  647. }
  648. /*
  649. * find a ready process that did run at an available core
  650. * or one that has not moved for some time.
  651. */
  652. if(p->mp == nil || p->mp->proc == nil || n>0){
  653. goto found;
  654. }
  655. }
  656. /* waste time or halt the CPU */
  657. idlehands();
  658. /* remember how much time we're here */
  659. now = perfticks();
  660. machp()->perf.inidle += now-start;
  661. start = now;
  662. n++;
  663. goto loop;
  664. found:
  665. assert(machp()->machno == 0);
  666. splhi();
  667. /*
  668. * find a core for this process, but honor wiring.
  669. */
  670. mp = p->wired;
  671. if(mp != nil){
  672. if(mp->proc != nil)
  673. goto loop;
  674. }else{
  675. for(i = 0; i < MACHMAX; i++){
  676. /*j = pickcore(p->color, i);
  677. if((mp = sys->machptr[j]) != nil && mp->online && mp->nixtype == NIXTC){*/
  678. if((mp = sys->machptr[i]) != nil){ // && mp->online && mp->nixtype == NIXTC){
  679. if(mp != machp() && mp->proc == nil)
  680. break;
  681. }
  682. }
  683. if(i == MACHMAX){
  684. preemptfor(p);
  685. goto loop;
  686. }
  687. }
  688. p = dequeueproc(&run, rq, p);
  689. mp->proc = p;
  690. if(p != nil){
  691. p->state = Scheding;
  692. p->mp = mp;
  693. }
  694. n = 0;
  695. goto loop;
  696. }
  697. /*
  698. * SMP performs better than AMP with few cores.
  699. * So, leave this here by now. We should probably
  700. * write a unified version of runproc good enough for
  701. * both SMP and AMP.
  702. */
  703. static Proc*
  704. smprunproc(void)
  705. {
  706. Schedq *rq;
  707. Proc *p;
  708. uint32_t start, now;
  709. int i;
  710. start = perfticks();
  711. run.preempts++;
  712. loop:
  713. /*
  714. * find a process that last ran on this processor (affinity),
  715. * or one that hasn't moved in a while (load balancing). Every
  716. * time around the loop affinity goes down.
  717. */
  718. splhi();
  719. for(i = 0;; i++){
  720. /*
  721. * find the highest priority target process that this
  722. * processor can run given affinity constraints.
  723. *
  724. */
  725. for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--){
  726. for(p = rq->head; p; p = p->rnext){
  727. if(p->mp == nil || p->mp == sys->machptr[machp()->machno]
  728. || (!p->wired && i > 0))
  729. goto found;
  730. }
  731. }
  732. /* waste time or halt the CPU */
  733. idlehands();
  734. splhi();
  735. /* remember how much time we're here */
  736. now = perfticks();
  737. machp()->perf.inidle += now-start;
  738. start = now;
  739. }
  740. found:
  741. p = dequeueproc(&run, rq, p);
  742. if(p == nil)
  743. goto loop;
  744. p->state = Scheding;
  745. p->mp = sys->machptr[machp()->machno];
  746. if(edflock(p)){
  747. edfrun(p, rq == &run.runq[PriEdf]); /* start deadline timer and do admin */
  748. edfunlock();
  749. }
  750. if(p->trace)
  751. proctrace(p, SRun, 0);
  752. return p;
  753. }
  754. /*
  755. * It's possible to force to single core even
  756. * in a multiprocessor machine
  757. */
  758. static Proc*
  759. singlerunproc(void)
  760. {
  761. Schedq *rq;
  762. Proc *p;
  763. uint32_t start, now, skipscheds;
  764. int i;
  765. start = perfticks();
  766. /* cooperative scheduling until the clock ticks */
  767. if((p=machp()->readied) && p->mach==0 && p->state==Ready
  768. && &run.runq[Nrq-1].head == nil && &run.runq[Nrq-2].head == nil){
  769. skipscheds++;
  770. rq = &run.runq[p->priority];
  771. if(0)hi("runproc going to found before loop...\n");
  772. goto found;
  773. }
  774. run.preempts++;
  775. loop:
  776. /*
  777. * find a process that last ran on this processor (affinity),
  778. * or one that hasn't moved in a while (load balancing). Every
  779. * time around the loop affinity goes down.
  780. */
  781. spllo();
  782. for(i = 0;; i++){
  783. /*
  784. * find the highest priority target process that this
  785. * processor can run given affinity constraints.
  786. *
  787. */
  788. for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--){
  789. for(p = rq->head; p; p = p->rnext){
  790. if(p->mp == nil || p->mp == sys->machptr[machp()->machno]
  791. || (!p->wired && i > 0))
  792. {
  793. if(0)hi("runproc going to found inside loop...\n");
  794. goto found;
  795. }
  796. }
  797. }
  798. /* waste time or halt the CPU */
  799. idlehands();
  800. /* remember how much time we're here */
  801. now = perfticks();
  802. machp()->perf.inidle += now-start;
  803. start = now;
  804. }
  805. found:
  806. splhi();
  807. if(0)hi("runproc into found...\n");
  808. p = dequeueproc(&run, rq, p);
  809. if(p == nil)
  810. {
  811. if(0)hi("runproc p=nil :(\n");
  812. goto loop;
  813. }
  814. p->state = Scheding;
  815. if(0)hi("runproc, pm->mp = sys->machptr[machp()->machno]\n");
  816. p->mp = sys->machptr[machp()->machno];
  817. if(0){hi("runproc, sys->machptr[machp()->machno] = "); put64((uint64_t)p->mp); hi("\n");}
  818. if(edflock(p)){
  819. edfrun(p, rq == &run.runq[PriEdf]); /* start deadline timer and do admin */
  820. edfunlock();
  821. }
  822. if(p->trace)
  823. proctrace(p, SRun, 0);
  824. /* avoiding warnings, this will be removed */
  825. USED(mach0sched); USED(smprunproc);
  826. if(0){hi("runproc, returning p ");
  827. put64((uint64_t)p);
  828. hi("\n");}
  829. return p;
  830. }
  831. /*
  832. * pick a process to run.
  833. * most of this is used in AMP sched.
  834. * (on a quad core or less, we use SMP).
  835. * In the case of core 0 we always return nil, but
  836. * schedule the picked process at any other available TC.
  837. * In the case of other cores we wait until a process is given
  838. * by core 0.
  839. */
  840. Proc*
  841. runproc(void)
  842. {
  843. Schedq *rq;
  844. Proc *p;
  845. uint32_t start, now;
  846. if(nosmp)
  847. return singlerunproc();
  848. //NIX modeset cannot work without halt every cpu at boot
  849. //if(sys->nmach <= AMPmincores)
  850. else
  851. return smprunproc();
  852. start = perfticks();
  853. run.preempts++;
  854. rq = nil;
  855. if(machp()->machno != 0){
  856. do{
  857. spllo();
  858. while(machp()->proc == nil)
  859. idlehands();
  860. now = perfticks();
  861. machp()->perf.inidle += now-start;
  862. start = now;
  863. splhi();
  864. p = machp()->proc;
  865. }while(p == nil);
  866. p->state = Scheding;
  867. p->mp = sys->machptr[machp()->machno];
  868. if(edflock(p)){
  869. edfrun(p, rq == &run.runq[PriEdf]); /* start deadline timer and do admin */
  870. edfunlock();
  871. }
  872. if(p->trace)
  873. proctrace(p, SRun, 0);
  874. return p;
  875. }
  876. mach0sched();
  877. return nil; /* not reached */
  878. }
  879. int
  880. canpage(Proc *p)
  881. {
  882. int ok;
  883. Sched *sch;
  884. splhi();
  885. sch = procsched(p);
  886. lock(sch);
  887. /* Only reliable way to see if we are Running */
  888. if(p->mach == 0) {
  889. p->newtlb = 1;
  890. ok = 1;
  891. }
  892. else
  893. ok = 0;
  894. unlock(sch);
  895. spllo();
  896. return ok;
  897. }
  898. Proc*
  899. newproc(void)
  900. {
  901. Proc *up = externup();
  902. Proc *p;
  903. p = psalloc();
  904. p->state = Scheding;
  905. p->psstate = "New";
  906. p->mach = 0;
  907. p->qnext = 0;
  908. p->nchild = 0;
  909. p->nwait = 0;
  910. p->waitq = 0;
  911. p->parent = 0;
  912. p->pgrp = 0;
  913. p->egrp = 0;
  914. p->fgrp = 0;
  915. p->rgrp = 0;
  916. p->pdbg = 0;
  917. p->kp = 0;
  918. if(up != nil && up->procctl == Proc_tracesyscall)
  919. p->procctl = Proc_tracesyscall;
  920. else
  921. p->procctl = 0;
  922. p->syscalltrace = nil;
  923. p->notepending = 0;
  924. p->ureg = 0;
  925. p->privatemem = 0;
  926. p->noswap = 0;
  927. p->errstr = p->errbuf0;
  928. p->syserrstr = p->errbuf1;
  929. p->errbuf0[0] = '\0';
  930. p->errbuf1[0] = '\0';
  931. p->nlocks = 0;
  932. p->delaysched = 0;
  933. p->trace = 0;
  934. kstrdup(&p->user, "*nouser");
  935. kstrdup(&p->text, "*notext");
  936. kstrdup(&p->args, "");
  937. p->nargs = 0;
  938. p->setargs = 0;
  939. memset(p->seg, 0, sizeof p->seg);
  940. p->pid = incref(&pidalloc);
  941. pshash(p);
  942. p->noteid = incref(&noteidalloc);
  943. if(p->pid <= 0 || p->noteid <= 0)
  944. panic("pidalloc");
  945. if(p->kstack == 0){
  946. p->kstack = smalloc(KSTACK);
  947. *(uintptr_t*)p->kstack = STACKGUARD;
  948. }
  949. /* sched params */
  950. p->mp = 0;
  951. p->wired = 0;
  952. procpriority(p, PriNormal, 0);
  953. p->cpu = 0;
  954. p->lastupdate = sys->ticks*Scaling;
  955. p->edf = nil;
  956. p->ntrap = 0;
  957. p->nintr = 0;
  958. p->nsyscall = 0;
  959. p->nactrap = 0;
  960. p->nacsyscall = 0;
  961. p->nicc = 0;
  962. p->actime = 0ULL;
  963. p->tctime = 0ULL;
  964. p->ac = nil;
  965. p->nfullq = 0;
  966. p->req = nil;
  967. p->resp = nil;
  968. memset(&p->PMMU, 0, sizeof p->PMMU);
  969. return p;
  970. }
  971. /*
  972. * wire this proc to a machine
  973. */
  974. void
  975. procwired(Proc *p, int bm)
  976. {
  977. Proc *up = externup();
  978. Proc *pp;
  979. int i;
  980. char nwired[MACHMAX];
  981. Mach *wm;
  982. if(bm < 0){
  983. /* pick a machine to wire to */
  984. memset(nwired, 0, sizeof(nwired));
  985. p->wired = 0;
  986. for(i=0; (pp = psincref(i)) != nil; i++){
  987. wm = pp->wired;
  988. if(wm && pp->pid)
  989. nwired[wm->machno]++;
  990. psdecref(pp);
  991. }
  992. bm = 0;
  993. for(i=0; i<sys->nmach; i++)
  994. if(nwired[i] < nwired[bm])
  995. bm = i;
  996. } else {
  997. /* use the virtual machine requested */
  998. bm = bm % sys->nmach;
  999. }
  1000. p->wired = sys->machptr[bm];
  1001. p->mp = p->wired;
  1002. /*
  1003. * adjust our color to the new domain.
  1004. */
  1005. if(up == nil || p != up)
  1006. return;
  1007. up->color = corecolor(up->mp->machno);
  1008. qlock(&up->seglock);
  1009. for(i = 0; i < NSEG; i++)
  1010. if(up->seg[i])
  1011. up->seg[i]->color = up->color;
  1012. qunlock(&up->seglock);
  1013. }
  1014. void
  1015. procpriority(Proc *p, int pri, int fixed)
  1016. {
  1017. if(pri >= Npriq)
  1018. pri = Npriq - 1;
  1019. else if(pri < 0)
  1020. pri = 0;
  1021. p->basepri = pri;
  1022. p->priority = pri;
  1023. if(fixed){
  1024. p->fixedpri = 1;
  1025. } else {
  1026. p->fixedpri = 0;
  1027. }
  1028. }
  1029. /*
  1030. * sleep if a condition is not true. Another process will
  1031. * awaken us after it sets the condition. When we awaken
  1032. * the condition may no longer be true.
  1033. *
  1034. * we lock both the process and the rendezvous to keep r->p
  1035. * and p->r synchronized.
  1036. */
  1037. void
  1038. sleep(Rendez *r, int (*f)(void*), void *arg)
  1039. {
  1040. Proc *up = externup();
  1041. Mpl pl;
  1042. pl = splhi();
  1043. if(up->nlocks)
  1044. print("process %d sleeps with %d locks held, last lock %#p locked at pc %#p, sleep called from %#p\n",
  1045. up->pid, up->nlocks, up->lastlock, up->lastlock->_pc, getcallerpc(&r));
  1046. lock(r);
  1047. lock(&up->rlock);
  1048. if(r->_p){
  1049. print("double sleep called from %#p, %d %d\n",
  1050. getcallerpc(&r), r->_p->pid, up->pid);
  1051. dumpstack();
  1052. }
  1053. /*
  1054. * Wakeup only knows there may be something to do by testing
  1055. * r->p in order to get something to lock on.
  1056. * Flush that information out to memory in case the sleep is
  1057. * committed.
  1058. */
  1059. r->_p = up;
  1060. if((*f)(arg) || up->notepending){
  1061. /*
  1062. * if condition happened or a note is pending
  1063. * never mind
  1064. */
  1065. r->_p = nil;
  1066. unlock(&up->rlock);
  1067. unlock(r);
  1068. } else {
  1069. /*
  1070. * now we are committed to
  1071. * change state and call scheduler
  1072. */
  1073. if(up->trace)
  1074. proctrace(up, SSleep, 0);
  1075. up->state = Wakeme;
  1076. up->r = r;
  1077. /* statistics */
  1078. machp()->cs++;
  1079. procsave(up);
  1080. mmuflushtlb(machp()->pml4->pa);
  1081. if(setlabel(&up->sched)) {
  1082. /*
  1083. * here when the process is awakened
  1084. */
  1085. procrestore(up);
  1086. spllo();
  1087. } else {
  1088. /*
  1089. * here to go to sleep (i.e. stop Running)
  1090. */
  1091. unlock(&up->rlock);
  1092. unlock(r);
  1093. /*debug*/gotolabel(&machp()->sched);
  1094. }
  1095. }
  1096. if(up->notepending) {
  1097. up->notepending = 0;
  1098. splx(pl);
  1099. if(up->procctl == Proc_exitme && up->closingfgrp)
  1100. forceclosefgrp();
  1101. error(Eintr);
  1102. }
  1103. splx(pl);
  1104. }
  1105. static int
  1106. tfn(void *arg)
  1107. {
  1108. Proc *up = externup();
  1109. return up->trend == nil || up->tfn(arg);
  1110. }
  1111. void
  1112. twakeup(Ureg* ureg, Timer *t)
  1113. {
  1114. Proc *p;
  1115. Rendez *trend;
  1116. p = t->ta;
  1117. trend = p->trend;
  1118. p->trend = 0;
  1119. if(trend)
  1120. wakeup(trend);
  1121. }
  1122. void
  1123. tsleep(Rendez *r, int (*fn)(void*), void *arg, int32_t ms)
  1124. {
  1125. Proc *up = externup();
  1126. if (up->tt){
  1127. print("tsleep: timer active: mode %d, tf %#p\n",
  1128. up->tmode, up->tf);
  1129. timerdel(up);
  1130. }
  1131. up->tns = MS2NS(ms);
  1132. up->tf = twakeup;
  1133. up->tmode = Trelative;
  1134. up->ta = up;
  1135. up->trend = r;
  1136. up->tfn = fn;
  1137. timeradd(up);
  1138. if(waserror()){
  1139. timerdel(up);
  1140. nexterror();
  1141. }
  1142. sleep(r, tfn, arg);
  1143. if (up->tt)
  1144. timerdel(up);
  1145. up->twhen = 0;
  1146. poperror();
  1147. }
  1148. /*
  1149. * Expects that only one process can call wakeup for any given Rendez.
  1150. * We hold both locks to ensure that r->p and p->r remain consistent.
  1151. * Richard Miller has a better solution that doesn't require both to
  1152. * be held simultaneously, but I'm a paranoid - presotto.
  1153. */
  1154. Proc*
  1155. wakeup(Rendez *r)
  1156. {
  1157. Mpl pl;
  1158. Proc *p;
  1159. pl = splhi();
  1160. lock(r);
  1161. p = r->_p;
  1162. if(p != nil){
  1163. lock(&p->rlock);
  1164. if(p->state != Wakeme || p->r != r)
  1165. panic("wakeup: state");
  1166. r->_p = nil;
  1167. p->r = nil;
  1168. ready(p);
  1169. unlock(&p->rlock);
  1170. }
  1171. unlock(r);
  1172. splx(pl);
  1173. return p;
  1174. }
  1175. /*
  1176. * if waking a sleeping process, this routine must hold both
  1177. * p->rlock and r->lock. However, it can't know them in
  1178. * the same order as wakeup causing a possible lock ordering
  1179. * deadlock. We break the deadlock by giving up the p->rlock
  1180. * lock if we can't get the r->lock and retrying.
  1181. */
  1182. int
  1183. postnote(Proc *p, int dolock, char *n, int flag)
  1184. {
  1185. Mpl pl;
  1186. int ret;
  1187. Rendez *r;
  1188. Proc *d, **l;
  1189. if(dolock)
  1190. qlock(&p->debug);
  1191. if(flag != NUser && (p->notify == 0 || p->notified))
  1192. p->nnote = 0;
  1193. ret = 0;
  1194. if(p->nnote < NNOTE) {
  1195. strcpy(p->note[p->nnote].msg, n);
  1196. p->note[p->nnote++].flag = flag;
  1197. ret = 1;
  1198. }
  1199. p->notepending = 1;
  1200. /* NIX */
  1201. if(p->state == Exotic){
  1202. /* it could be that the process is not running
  1203. * in the AC when we interrupt the AC, but then
  1204. * we'd only get an extra interrupt in the AC, and
  1205. * nothing should happen.
  1206. */
  1207. intrac(p);
  1208. }
  1209. if(dolock)
  1210. qunlock(&p->debug);
  1211. /* this loop is to avoid lock ordering problems. */
  1212. for(;;){
  1213. pl = splhi();
  1214. lock(&p->rlock);
  1215. r = p->r;
  1216. /* waiting for a wakeup? */
  1217. if(r == nil)
  1218. break; /* no */
  1219. /* try for the second lock */
  1220. if(canlock(r)){
  1221. if(p->state != Wakeme || r->_p != p)
  1222. panic("postnote: state %d %d %d", r->_p != p, p->r != r, p->state);
  1223. p->r = nil;
  1224. r->_p = nil;
  1225. ready(p);
  1226. unlock(r);
  1227. break;
  1228. }
  1229. /* give other process time to get out of critical section and try again */
  1230. unlock(&p->rlock);
  1231. splx(pl);
  1232. sched();
  1233. }
  1234. unlock(&p->rlock);
  1235. splx(pl);
  1236. if(p->state != Rendezvous){
  1237. if(p->state == Semdown)
  1238. ready(p);
  1239. return ret;
  1240. }
  1241. /* Try and pull out of a rendezvous */
  1242. lock(p->rgrp);
  1243. if(p->state == Rendezvous) {
  1244. p->rendval = ~0;
  1245. l = &REND(p->rgrp, p->rendtag);
  1246. for(d = *l; d; d = d->rendhash) {
  1247. if(d == p) {
  1248. *l = p->rendhash;
  1249. break;
  1250. }
  1251. l = &d->rendhash;
  1252. }
  1253. ready(p);
  1254. }
  1255. unlock(p->rgrp);
  1256. return ret;
  1257. }
  1258. /*
  1259. * weird thing: keep at most NBROKEN around
  1260. */
  1261. #define NBROKEN 4
  1262. struct
  1263. {
  1264. QLock;
  1265. int n;
  1266. Proc *p[NBROKEN];
  1267. }broken;
  1268. void
  1269. addbroken(Proc *p)
  1270. {
  1271. Proc *up = externup();
  1272. qlock(&broken);
  1273. if(broken.n == NBROKEN) {
  1274. ready(broken.p[0]);
  1275. memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1));
  1276. --broken.n;
  1277. }
  1278. broken.p[broken.n++] = p;
  1279. qunlock(&broken);
  1280. stopac();
  1281. edfstop(up);
  1282. p->state = Broken;
  1283. p->psstate = 0;
  1284. sched();
  1285. }
  1286. void
  1287. unbreak(Proc *p)
  1288. {
  1289. int b;
  1290. qlock(&broken);
  1291. for(b=0; b < broken.n; b++)
  1292. if(broken.p[b] == p) {
  1293. broken.n--;
  1294. memmove(&broken.p[b], &broken.p[b+1],
  1295. sizeof(Proc*)*(NBROKEN-(b+1)));
  1296. ready(p);
  1297. break;
  1298. }
  1299. qunlock(&broken);
  1300. }
  1301. int
  1302. freebroken(void)
  1303. {
  1304. int i, n;
  1305. qlock(&broken);
  1306. n = broken.n;
  1307. for(i=0; i<n; i++) {
  1308. ready(broken.p[i]);
  1309. broken.p[i] = 0;
  1310. }
  1311. broken.n = 0;
  1312. qunlock(&broken);
  1313. return n;
  1314. }
  1315. void
  1316. pexit(char *exitstr, int freemem)
  1317. {
  1318. Proc *up = externup();
  1319. Proc *p;
  1320. Segment **s, **es;
  1321. int32_t utime, stime;
  1322. Waitq *wq, *f, *next;
  1323. Fgrp *fgrp;
  1324. Egrp *egrp;
  1325. Rgrp *rgrp;
  1326. Pgrp *pgrp;
  1327. Chan *dot;
  1328. if(0 && up->nfullq > 0)
  1329. iprint(" %s=%d", up->text, up->nfullq);
  1330. if(0 && up->nicc > 0)
  1331. iprint(" [%s nicc %ud tctime %ulld actime %ulld]\n",
  1332. up->text, up->nicc, up->tctime, up->actime);
  1333. if(up->syscalltrace != nil)
  1334. free(up->syscalltrace);
  1335. up->syscalltrace = nil;
  1336. up->alarm = 0;
  1337. if (up->tt)
  1338. timerdel(up);
  1339. if(up->trace)
  1340. proctrace(up, SDead, 0);
  1341. /* nil out all the resources under lock (free later) */
  1342. qlock(&up->debug);
  1343. fgrp = up->fgrp;
  1344. up->fgrp = nil;
  1345. egrp = up->egrp;
  1346. up->egrp = nil;
  1347. rgrp = up->rgrp;
  1348. up->rgrp = nil;
  1349. pgrp = up->pgrp;
  1350. up->pgrp = nil;
  1351. dot = up->dot;
  1352. up->dot = nil;
  1353. qunlock(&up->debug);
  1354. if(fgrp)
  1355. closefgrp(fgrp);
  1356. if(egrp)
  1357. closeegrp(egrp);
  1358. if(rgrp)
  1359. closergrp(rgrp);
  1360. if(dot)
  1361. cclose(dot);
  1362. if(pgrp)
  1363. closepgrp(pgrp);
  1364. /*
  1365. * if not a kernel process and have a parent,
  1366. * do some housekeeping.
  1367. */
  1368. if(up->kp == 0) {
  1369. p = up->parent;
  1370. if(p == 0) {
  1371. if(exitstr == 0)
  1372. exitstr = "unknown";
  1373. //die("bootprocessdeath");
  1374. panic("boot process died: %s", exitstr);
  1375. }
  1376. while(waserror())
  1377. ;
  1378. wq = smalloc(sizeof(Waitq));
  1379. poperror();
  1380. wq->w.pid = up->pid;
  1381. utime = up->time[TUser] + up->time[TCUser];
  1382. stime = up->time[TSys] + up->time[TCSys];
  1383. wq->w.time[TUser] = tk2ms(utime);
  1384. wq->w.time[TSys] = tk2ms(stime);
  1385. wq->w.time[TReal] = tk2ms(sys->machptr[0]->ticks - up->time[TReal]);
  1386. if(exitstr && exitstr[0])
  1387. snprint(wq->w.msg, sizeof(wq->w.msg), "%s %d: %s",
  1388. up->text, up->pid, exitstr);
  1389. else
  1390. wq->w.msg[0] = '\0';
  1391. lock(&p->exl);
  1392. /*
  1393. * Check that parent is still alive.
  1394. */
  1395. if(p->pid == up->parentpid && p->state != Broken) {
  1396. p->nchild--;
  1397. p->time[TCUser] += utime;
  1398. p->time[TCSys] += stime;
  1399. /*
  1400. * If there would be more than 128 wait records
  1401. * processes for my parent, then don't leave a wait
  1402. * record behind. This helps prevent badly written
  1403. * daemon processes from accumulating lots of wait
  1404. * records.
  1405. */
  1406. if(p->nwait < 128) {
  1407. wq->next = p->waitq;
  1408. p->waitq = wq;
  1409. p->nwait++;
  1410. wq = nil;
  1411. wakeup(&p->waitr);
  1412. }
  1413. }
  1414. unlock(&p->exl);
  1415. if(wq)
  1416. free(wq);
  1417. }
  1418. if(!freemem)
  1419. addbroken(up);
  1420. qlock(&up->seglock);
  1421. es = &up->seg[NSEG];
  1422. for(s = up->seg; s < es; s++) {
  1423. if(*s) {
  1424. putseg(*s);
  1425. *s = 0;
  1426. }
  1427. }
  1428. qunlock(&up->seglock);
  1429. lock(&up->exl); /* Prevent my children from leaving waits */
  1430. psunhash(up);
  1431. up->pid = 0;
  1432. wakeup(&up->waitr);
  1433. unlock(&up->exl);
  1434. for(f = up->waitq; f; f = next) {
  1435. next = f->next;
  1436. free(f);
  1437. }
  1438. /* release debuggers */
  1439. qlock(&up->debug);
  1440. if(up->pdbg) {
  1441. wakeup(&up->pdbg->sleep);
  1442. up->pdbg = 0;
  1443. }
  1444. qunlock(&up->debug);
  1445. /* Sched must not loop for these locks */
  1446. lock(&procalloc);
  1447. lock(&pga);
  1448. stopac();
  1449. edfstop(up);
  1450. up->state = Moribund;
  1451. sched();
  1452. panic("pexit");
  1453. }
  1454. int
  1455. haswaitq(void *x)
  1456. {
  1457. Proc *p;
  1458. p = (Proc *)x;
  1459. return p->waitq != 0;
  1460. }
  1461. int
  1462. pwait(Waitmsg *w)
  1463. {
  1464. Proc *up = externup();
  1465. int cpid;
  1466. Waitq *wq;
  1467. if(!canqlock(&up->qwaitr))
  1468. error(Einuse);
  1469. if(waserror()) {
  1470. qunlock(&up->qwaitr);
  1471. nexterror();
  1472. }
  1473. lock(&up->exl);
  1474. if(up->nchild == 0 && up->waitq == 0) {
  1475. unlock(&up->exl);
  1476. error(Enochild);
  1477. }
  1478. unlock(&up->exl);
  1479. sleep(&up->waitr, haswaitq, up);
  1480. lock(&up->exl);
  1481. wq = up->waitq;
  1482. up->waitq = wq->next;
  1483. up->nwait--;
  1484. unlock(&up->exl);
  1485. qunlock(&up->qwaitr);
  1486. poperror();
  1487. if(w)
  1488. memmove(w, &wq->w, sizeof(Waitmsg));
  1489. cpid = wq->w.pid;
  1490. free(wq);
  1491. return cpid;
  1492. }
  1493. void
  1494. dumpaproc(Proc *p)
  1495. {
  1496. uintptr_t bss;
  1497. char *s;
  1498. int sno;
  1499. if(p == 0)
  1500. return;
  1501. bss = 0;
  1502. for(sno = 0; sno < NSEG; sno++)
  1503. if(p->seg[sno] != nil && (p->seg[sno]->type & SG_TYPE) == SG_BSS)
  1504. bss = p->seg[sno]->top;
  1505. s = p->psstate;
  1506. if(s == 0)
  1507. s = statename[p->state];
  1508. print("%3d:%10s pc %#p dbgpc %#p %8s (%s) ut %ld st %ld bss %#p qpc %#p nl %d nd %lud lpc %#p pri %lud\n",
  1509. p->pid, p->text, p->pc, dbgpc(p), s, statename[p->state],
  1510. p->time[0], p->time[1], bss, p->qpc, p->nlocks,
  1511. p->delaysched, p->lastlock ? p->lastlock->_pc : 0, p->priority);
  1512. }
  1513. void
  1514. procdump(void)
  1515. {
  1516. Proc *up = externup();
  1517. int i;
  1518. Proc *p;
  1519. if(up)
  1520. print("up %d\n", up->pid);
  1521. else
  1522. print("no current process\n");
  1523. for(i=0; (p = psincref(i)) != nil; i++) {
  1524. if(p->state != Dead)
  1525. dumpaproc(p);
  1526. psdecref(p);
  1527. }
  1528. }
  1529. /*
  1530. * wait till all processes have flushed their mmu
  1531. * state about segement s
  1532. */
  1533. void
  1534. procflushseg(Segment *s)
  1535. {
  1536. int i, ns, nm, nwait;
  1537. Proc *p;
  1538. Mach *mp;
  1539. /*
  1540. * tell all processes with this
  1541. * segment to flush their mmu's
  1542. */
  1543. nwait = 0;
  1544. for(i=0; (p = psincref(i)) != nil; i++) {
  1545. if(p->state == Dead){
  1546. psdecref(p);
  1547. continue;
  1548. }
  1549. for(ns = 0; ns < NSEG; ns++){
  1550. if(p->seg[ns] == s){
  1551. p->newtlb = 1;
  1552. for(nm = 0; nm < MACHMAX; nm++)
  1553. if((mp = sys->machptr[nm]) != nil && mp->online)
  1554. if(mp->proc == p){
  1555. mp->mmuflush = 1;
  1556. nwait++;
  1557. }
  1558. break;
  1559. }
  1560. }
  1561. psdecref(p);
  1562. }
  1563. if(nwait == 0)
  1564. return;
  1565. /*
  1566. * wait for all processors to take a clock interrupt
  1567. * and flush their mmu's.
  1568. * NIX BUG: this won't work if another core is in AC mode.
  1569. * In that case we must IPI it, but only if that core is
  1570. * using this segment.
  1571. */
  1572. for(i = 0; i < MACHMAX; i++)
  1573. if((mp = sys->machptr[i]) != nil && mp->online)
  1574. if(mp != machp())
  1575. while(mp->mmuflush)
  1576. sched();
  1577. }
  1578. void
  1579. scheddump(void)
  1580. {
  1581. Proc *p;
  1582. Schedq *rq;
  1583. for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--){
  1584. if(rq->head == 0)
  1585. continue;
  1586. print("run[%ld]:", rq-run.runq);
  1587. for(p = rq->head; p; p = p->rnext)
  1588. print(" %d(%lud)", p->pid, machp()->ticks - p->readytime);
  1589. print("\n");
  1590. delay(150);
  1591. }
  1592. print("nrdy %d\n", run.nrdy);
  1593. }
  1594. void
  1595. kproc(char *name, void (*func)(void *), void *arg)
  1596. {
  1597. Proc *up = externup();
  1598. Proc *p;
  1599. static Pgrp *kpgrp;
  1600. p = newproc();
  1601. p->psstate = 0;
  1602. p->procmode = 0640;
  1603. p->kp = 1;
  1604. p->noswap = 1;
  1605. p->scallnr = up->scallnr;
  1606. memmove(p->arg, up->arg, sizeof(up->arg));
  1607. p->nerrlab = 0;
  1608. p->slash = up->slash;
  1609. p->dot = up->dot;
  1610. if(p->dot)
  1611. incref(p->dot);
  1612. memmove(p->note, up->note, sizeof(p->note));
  1613. p->nnote = up->nnote;
  1614. p->notified = 0;
  1615. p->lastnote = up->lastnote;
  1616. p->notify = up->notify;
  1617. p->ureg = 0;
  1618. p->dbgreg = 0;
  1619. procpriority(p, PriKproc, 0);
  1620. kprocchild(p, func, arg);
  1621. kstrdup(&p->user, eve);
  1622. kstrdup(&p->text, name);
  1623. if(kpgrp == 0)
  1624. kpgrp = newpgrp();
  1625. p->pgrp = kpgrp;
  1626. incref(kpgrp);
  1627. memset(p->time, 0, sizeof(p->time));
  1628. p->time[TReal] = sys->ticks;
  1629. ready(p);
  1630. /*
  1631. * since the bss/data segments are now shareable,
  1632. * any mmu info about this process is now stale
  1633. * and has to be discarded.
  1634. */
  1635. p->newtlb = 1;
  1636. mmuflush();
  1637. }
  1638. /*
  1639. * called splhi() by notify(). See comment in notify for the
  1640. * reasoning.
  1641. */
  1642. void
  1643. procctl(Proc *p)
  1644. {
  1645. Proc *up = externup();
  1646. Mpl pl;
  1647. char *state;
  1648. switch(p->procctl) {
  1649. case Proc_exitbig:
  1650. spllo();
  1651. pexit("Killed: Insufficient physical memory", 1);
  1652. case Proc_exitme:
  1653. spllo(); /* pexit has locks in it */
  1654. pexit("Killed", 1);
  1655. case Proc_traceme:
  1656. if(p->nnote == 0)
  1657. return;
  1658. /* No break */
  1659. case Proc_stopme:
  1660. p->procctl = 0;
  1661. state = p->psstate;
  1662. p->psstate = "Stopped";
  1663. /* free a waiting debugger */
  1664. pl = spllo();
  1665. qlock(&p->debug);
  1666. if(p->pdbg) {
  1667. wakeup(&p->pdbg->sleep);
  1668. p->pdbg = 0;
  1669. }
  1670. qunlock(&p->debug);
  1671. splhi();
  1672. p->state = Stopped;
  1673. sched();
  1674. p->psstate = state;
  1675. splx(pl);
  1676. return;
  1677. case Proc_toac:
  1678. p->procctl = 0;
  1679. /*
  1680. * This pretends to return from the system call,
  1681. * by moving to a core, but never returns (unless
  1682. * the process gets moved back to a TC.)
  1683. */
  1684. spllo();
  1685. runacore();
  1686. return;
  1687. case Proc_totc:
  1688. p->procctl = 0;
  1689. if(p != up)
  1690. panic("procctl: stopac: p != up");
  1691. spllo();
  1692. stopac();
  1693. return;
  1694. }
  1695. }
  1696. void
  1697. error(char *err)
  1698. {
  1699. Proc *up = externup();
  1700. spllo();
  1701. assert(up->nerrlab < NERR);
  1702. kstrcpy(up->errstr, err, ERRMAX);
  1703. setlabel(&up->errlab[NERR-1]);
  1704. nexterror();
  1705. }
  1706. void
  1707. nexterror(void)
  1708. {
  1709. Proc *up = externup();
  1710. /*debug*/gotolabel(&up->errlab[--up->nerrlab]);
  1711. }
  1712. void
  1713. exhausted(char *resource)
  1714. {
  1715. char buf[ERRMAX];
  1716. sprint(buf, "no free %s", resource);
  1717. iprint("%s\n", buf);
  1718. error(buf);
  1719. }
  1720. void
  1721. killbig(char *why)
  1722. {
  1723. int i, x;
  1724. Segment *s;
  1725. uintptr_t l, max;
  1726. Proc *p, *kp;
  1727. max = 0;
  1728. kp = nil;
  1729. for(x = 0; (p = psincref(x)) != nil; x++) {
  1730. if(p->state == Dead || p->kp){
  1731. psdecref(p);
  1732. continue;
  1733. }
  1734. l = 0;
  1735. for(i=1; i<NSEG; i++) {
  1736. s = p->seg[i];
  1737. if(s != 0)
  1738. l += s->top - s->base;
  1739. }
  1740. if(l > max && ((p->procmode&0222) || strcmp(eve, p->user)!=0)) {
  1741. if(kp != nil)
  1742. psdecref(kp);
  1743. kp = p;
  1744. max = l;
  1745. }
  1746. else
  1747. psdecref(p);
  1748. }
  1749. if(kp == nil)
  1750. return;
  1751. print("%d: %s killed: %s\n", kp->pid, kp->text, why);
  1752. for(x = 0; (p = psincref(x)) != nil; x++) {
  1753. if(p->state == Dead || p->kp){
  1754. psdecref(p);
  1755. continue;
  1756. }
  1757. /* TODO(aki): figure out what this was for. the oom killer is broken anyway though?
  1758. if(p != kp && p->seg[BSEG] && p->seg[BSEG] == kp->seg[BSEG])
  1759. p->procctl = Proc_exitbig;
  1760. */
  1761. psdecref(p);
  1762. }
  1763. kp->procctl = Proc_exitbig;
  1764. for(i = 0; i < NSEG; i++) {
  1765. s = kp->seg[i];
  1766. if(s != 0 && canqlock(&s->lk)) {
  1767. mfreeseg(s, s->base, (s->top - s->base)/BIGPGSZ);
  1768. qunlock(&s->lk);
  1769. }
  1770. }
  1771. psdecref(kp);
  1772. }
  1773. /*
  1774. * change ownership to 'new' of all processes owned by 'old'. Used when
  1775. * eve changes.
  1776. */
  1777. void
  1778. renameuser(char *old, char *new)
  1779. {
  1780. int i;
  1781. Proc *p;
  1782. for(i = 0; (p = psincref(i)) != nil; i++){
  1783. if(p->user!=nil && strcmp(old, p->user)==0)
  1784. kstrdup(&p->user, new);
  1785. psdecref(p);
  1786. }
  1787. }
  1788. /*
  1789. * time accounting called by clock() splhi'd
  1790. * only cpu1 computes system load average
  1791. * but the system load average is accounted for cpu0.
  1792. */
  1793. void
  1794. accounttime(void)
  1795. {
  1796. Proc *p;
  1797. uint32_t n, per;
  1798. p = machp()->proc;
  1799. if(p) {
  1800. if(machp()->machno == 1)
  1801. run.nrun++;
  1802. p->time[p->insyscall]++;
  1803. }
  1804. /* calculate decaying duty cycles */
  1805. n = perfticks();
  1806. per = n - machp()->perf.last;
  1807. machp()->perf.last = n;
  1808. per = (machp()->perf.period*(HZ-1) + per)/HZ;
  1809. if(per != 0)
  1810. machp()->perf.period = per;
  1811. machp()->perf.avg_inidle = (machp()->perf.avg_inidle*(HZ-1)+machp()->perf.inidle)/HZ;
  1812. machp()->perf.inidle = 0;
  1813. machp()->perf.avg_inintr = (machp()->perf.avg_inintr*(HZ-1)+machp()->perf.inintr)/HZ;
  1814. machp()->perf.inintr = 0;
  1815. /* only one processor gets to compute system load averages.
  1816. * it has to be mach 1 when we use AMP.
  1817. */
  1818. //if(sys->nmach > 1 && machp()->machno != 1)
  1819. if(machp()->machno != 0) //Change to non-AMP
  1820. return;
  1821. /*
  1822. * calculate decaying load average.
  1823. * if we decay by (n-1)/n then it takes
  1824. * n clock ticks to go from load L to .36 L once
  1825. * things quiet down. it takes about 5 n clock
  1826. * ticks to go to zero. so using HZ means this is
  1827. * approximately the load over the last second,
  1828. * with a tail lasting about 5 seconds.
  1829. */
  1830. n = run.nrun;
  1831. run.nrun = 0;
  1832. n = (run.nrdy+n)*1000;
  1833. sys->load = (sys->load*(HZ-1)+n)/HZ;
  1834. }
  1835. void
  1836. halt(void)
  1837. {
  1838. if(run.nrdy != 0)
  1839. return;
  1840. hardhalt();
  1841. }