proc.c 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. #include "../port/error.h"
  15. #include "../port/edf.h"
  16. #include "errstr.h"
  17. #include <trace.h>
  18. extern int nosmp;
  19. enum
  20. {
  21. Scaling=2,
  22. AMPmincores = 5,
  23. };
  24. Ref noteidalloc;
  25. static Ref pidalloc;
  26. static Sched run;
  27. struct Procalloc procalloc;
  28. extern Proc* psalloc(void);
  29. extern void pshash(Proc*);
  30. extern void psrelease(Proc*);
  31. extern void psunhash(Proc*);
  32. static int reprioritize(Proc*);
  33. static void updatecpu(Proc*);
  34. static void rebalance(void);
  35. char *statename[] =
  36. { /* BUG: generate automatically */
  37. "Dead",
  38. "Moribund",
  39. "Ready",
  40. "Scheding",
  41. "Running",
  42. "Queueing",
  43. "QueueingR",
  44. "QueueingW",
  45. "Wakeme",
  46. "Broken",
  47. "Stopped",
  48. "Rendez",
  49. "Waitrelease",
  50. "Exotic",
  51. "Down",
  52. };
  53. #if 0
  54. void
  55. debuggotolabel(Label *p)
  56. {
  57. Mach *m = machp();
  58. if(0)hi("debuggotolabel");
  59. iprint("gotolabel: pid %p rip %p sp %p\n",
  60. m && m->externup? m->externup->pid : 0,
  61. (void *)p->pc,
  62. (void *)p->sp);
  63. /*
  64. */
  65. if (!p->pc)
  66. die("PC IS ZERO!");
  67. /* this is an example of putting a breakpoint
  68. * here so we can capture a particular process.
  69. * startup is very deterministic so this can
  70. * be very useful. You can then attach with
  71. * gdb and single step. In practice this helped us show
  72. * that our return stack for sysrforkret was bogus.
  73. if (m && m->externup && m->externup->pid == 6)
  74. die("PID 6\n");
  75. */
  76. gotolabel(p);
  77. }
  78. #endif
  79. Sched*
  80. procsched(Proc *p)
  81. {
  82. return &run;
  83. }
  84. /*
  85. * bad planning, once more.
  86. */
  87. void
  88. procinit0(void)
  89. {
  90. run.schedgain = 30;
  91. }
  92. /*
  93. * Always splhi()'ed.
  94. */
  95. void
  96. schedinit(void) /* never returns */
  97. {
  98. Mach *m = machp();
  99. Edf *e;
  100. m->inidle = 1;
  101. m->proc = nil;
  102. ainc(&run.nmach);
  103. setlabel(&m->sched);
  104. if(m->externup) {
  105. if((e = m->externup->edf) && (e->flags & Admitted))
  106. edfrecord(m->externup);
  107. m->qstart = 0;
  108. m->qexpired = 0;
  109. coherence();
  110. m->proc = 0;
  111. switch(m->externup->state) {
  112. case Running:
  113. ready(m->externup);
  114. break;
  115. case Moribund:
  116. m->externup->state = Dead;
  117. stopac();
  118. edfstop(m->externup);
  119. if (m->externup->edf)
  120. free(m->externup->edf);
  121. m->externup->edf = nil;
  122. /*
  123. * Holding locks from pexit:
  124. * procalloc
  125. * pga
  126. */
  127. mmurelease(m->externup);
  128. unlock(&pga);
  129. psrelease(m->externup);
  130. unlock(&procalloc);
  131. break;
  132. }
  133. m->externup->mach = nil;
  134. updatecpu(m->externup);
  135. m->externup = nil;
  136. }
  137. sched();
  138. }
  139. /*
  140. * If changing this routine, look also at sleep(). It
  141. * contains a copy of the guts of sched().
  142. */
  143. void
  144. sched(void)
  145. {
  146. Mach *m = machp();
  147. Proc *p;
  148. if(m->ilockdepth)
  149. panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p",
  150. m->machno,
  151. m->ilockdepth,
  152. m->externup? m->externup->lastilock: nil,
  153. (m->externup && m->externup->lastilock)? m->externup->lastilock->_pc: 0,
  154. getcallerpc(&p+2));
  155. kstackok();
  156. if(m->externup){
  157. /*
  158. * Delay the sched until the process gives up the locks
  159. * it is holding. This avoids dumb lock loops.
  160. * Don't delay if the process is Moribund.
  161. * It called sched to die.
  162. * But do sched eventually. This avoids a missing unlock
  163. * from hanging the entire kernel.
  164. * But don't reschedule procs holding palloc or procalloc.
  165. * Those are far too important to be holding while asleep.
  166. *
  167. * This test is not exact. There can still be a few
  168. * instructions in the middle of taslock when a process
  169. * holds a lock but Lock.p has not yet been initialized.
  170. */
  171. if(m->externup->nlocks)
  172. if(m->externup->state != Moribund)
  173. if(m->externup->delaysched < 20
  174. || pga.Lock.p == m->externup
  175. || procalloc.Lock.p == m->externup){
  176. m->externup->delaysched++;
  177. run.delayedscheds++;
  178. return;
  179. }
  180. m->externup->delaysched = 0;
  181. splhi();
  182. /* statistics */
  183. if(m->externup->nqtrap == 0 && m->externup->nqsyscall == 0)
  184. m->externup->nfullq++;
  185. m->cs++;
  186. procsave(m->externup);
  187. mmuflushtlb(m->pml4->pa);
  188. if(setlabel(&m->externup->sched)){
  189. procrestore(m->externup);
  190. spllo();
  191. return;
  192. }
  193. /*debug*/gotolabel(&m->sched);
  194. }
  195. m->inidle = 1;
  196. p = runproc(); /* core 0 never returns */
  197. m->inidle = 0;
  198. if(!p->edf){
  199. updatecpu(p);
  200. p->priority = reprioritize(p);
  201. }
  202. if(nosmp){
  203. if(p != m->readied)
  204. m->schedticks = m->ticks + HZ/10;
  205. m->readied = 0;
  206. }
  207. m->externup = p;
  208. m->qstart = m->ticks;
  209. m->externup->nqtrap = 0;
  210. m->externup->nqsyscall = 0;
  211. m->externup->state = Running;
  212. //m->externup->mach = m;
  213. m->externup->mach = sys->machptr[m->machno];
  214. m->proc = m->externup;
  215. // iprint("m->externup->sched.sp %p * %p\n", up->sched.sp,
  216. // *(void **) m->externup->sched.sp);
  217. mmuswitch(m->externup);
  218. assert(!m->externup->wired || m->externup->wired == m);
  219. if (0) hi("gotolabel\n");
  220. /*debug*/gotolabel(&m->externup->sched);
  221. }
  222. int
  223. anyready(void)
  224. {
  225. return run.runvec;
  226. }
  227. int
  228. anyhigher(void)
  229. {
  230. Mach *m = machp();
  231. return run.runvec & ~((1<<(m->externup->priority+1))-1);
  232. }
  233. /*
  234. * here once per clock tick to see if we should resched
  235. */
  236. void
  237. hzsched(void)
  238. {
  239. Mach *m = machp();
  240. /* once a second, rebalance will reprioritize ready procs */
  241. if(m->machno == 0){
  242. rebalance();
  243. return;
  244. }
  245. /* with <= 4 cores, we use SMP and core 0 does not set qexpired for us */
  246. if(sys->nmach <= AMPmincores)
  247. if(m->ticks - m->qstart >= HZ/10)
  248. m->qexpired = 1;
  249. /* unless preempted, get to run */
  250. if(m->qexpired && anyready())
  251. m->externup->delaysched++;
  252. }
  253. #if 0
  254. void
  255. hzsched(void)
  256. {
  257. Mach *m = machp();
  258. /* once a second, rebalance will reprioritize ready procs */
  259. if(m->machno == 0)
  260. rebalance();
  261. /* unless preempted, get to run for at least 100ms */
  262. if(anyhigher()
  263. || (!m->externup->fixedpri && m->ticks > m->schedticks && anyready())){
  264. m->readied = nil; /* avoid cooperative scheduling */
  265. m->externup->delaysched++;
  266. }
  267. }
  268. #endif
  269. /*
  270. * here at the end of non-clock interrupts to see if we should preempt the
  271. * current process. Returns 1 if preempted, 0 otherwise.
  272. */
  273. int
  274. preempted(void)
  275. {
  276. Mach *m = machp();
  277. if(m->externup && m->externup->state == Running)
  278. if(m->externup->preempted == 0)
  279. if(anyhigher())
  280. if(!active.exiting){
  281. /* Core 0 is dispatching all interrupts, so no core
  282. * actually running a user process is ever going call preempted, unless
  283. * we consider IPIs for preemption or we distribute interrupts.
  284. * But we are going to use SMP for machines with few cores.
  285. panic("preemted used");
  286. */
  287. m->externup->preempted = 1;
  288. sched();
  289. splhi();
  290. m->externup->preempted = 0;
  291. return 1;
  292. }
  293. return 0;
  294. }
  295. #if 0
  296. int
  297. preempted(void)
  298. {
  299. Mach *m = machp();
  300. if(m->externup && m->externup->state == Running)
  301. if(m->externup->preempted == 0)
  302. if(anyhigher())
  303. if(!active.exiting){
  304. m->readied = nil; /* avoid cooperative scheduling */
  305. m->externup->preempted = 1;
  306. sched();
  307. splhi();
  308. m->externup->preempted = 0;
  309. return 1;
  310. }
  311. return 0;
  312. }
  313. #endif
  314. /*
  315. * Update the cpu time average for this particular process,
  316. * which is about to change from up -> not up or vice versa.
  317. * p->lastupdate is the last time an updatecpu happened.
  318. *
  319. * The cpu time average is a decaying average that lasts
  320. * about D clock ticks. D is chosen to be approximately
  321. * the cpu time of a cpu-intensive "quick job". A job has to run
  322. * for approximately D clock ticks before we home in on its
  323. * actual cpu usage. Thus if you manage to get in and get out
  324. * quickly, you won't be penalized during your burst. Once you
  325. * start using your share of the cpu for more than about D
  326. * clock ticks though, your p->cpu hits 1000 (1.0) and you end up
  327. * below all the other quick jobs. Interactive tasks, because
  328. * they basically always use less than their fair share of cpu,
  329. * will be rewarded.
  330. *
  331. * If the process has not been running, then we want to
  332. * apply the filter
  333. *
  334. * cpu = cpu * (D-1)/D
  335. *
  336. * n times, yielding
  337. *
  338. * cpu = cpu * ((D-1)/D)^n
  339. *
  340. * but D is big enough that this is approximately
  341. *
  342. * cpu = cpu * (D-n)/D
  343. *
  344. * so we use that instead.
  345. *
  346. * If the process has been running, we apply the filter to
  347. * 1 - cpu, yielding a similar equation. Note that cpu is
  348. * stored in fixed point (* 1000).
  349. *
  350. * Updatecpu must be called before changing up, in order
  351. * to maintain accurate cpu usage statistics. It can be called
  352. * at any time to bring the stats for a given proc up-to-date.
  353. */
  354. static void
  355. updatecpu(Proc *p)
  356. {
  357. Mach *m = machp();
  358. int D, n, t, ocpu;
  359. if(p->edf)
  360. return;
  361. //t = sys->ticks*Scaling + Scaling/2;
  362. t = sys->machptr[0]->ticks*Scaling + Scaling/2; //Originally MACHP(0)
  363. n = t - p->lastupdate;
  364. p->lastupdate = t;
  365. if(n == 0)
  366. return;
  367. D = run.schedgain*HZ*Scaling;
  368. if(n > D)
  369. n = D;
  370. ocpu = p->cpu;
  371. if(p != m->externup)
  372. p->cpu = (ocpu*(D-n))/D;
  373. else{
  374. t = 1000 - ocpu;
  375. t = (t*(D-n))/D;
  376. p->cpu = 1000 - t;
  377. }
  378. //iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu);
  379. }
  380. /*
  381. * On average, p has used p->cpu of a cpu recently.
  382. * Its fair share is nmach/m->load of a cpu. If it has been getting
  383. * too much, penalize it. If it has been getting not enough, reward it.
  384. * I don't think you can get much more than your fair share that
  385. * often, so most of the queues are for using less. Having a priority
  386. * of 3 means you're just right. Having a higher priority (up to p->basepri)
  387. * means you're not using as much as you could.
  388. */
  389. static int
  390. reprioritize(Proc *p)
  391. {
  392. int fairshare, n, load, ratio;
  393. load = sys->load;
  394. if(load == 0)
  395. return p->basepri;
  396. /*
  397. * fairshare = 1.000 * conf.nproc * 1.000/load,
  398. * except the decimal point is moved three places
  399. * on both load and fairshare.
  400. */
  401. fairshare = (sys->nmach*1000*1000)/load;
  402. n = p->cpu;
  403. if(n == 0)
  404. n = 1;
  405. ratio = (fairshare+n/2) / n;
  406. if(ratio > p->basepri)
  407. ratio = p->basepri;
  408. if(ratio < 0)
  409. panic("reprioritize");
  410. //iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio);
  411. return ratio;
  412. }
  413. /*
  414. * add a process to a scheduling queue
  415. */
  416. static void
  417. queueproc(Sched *sch, Schedq *rq, Proc *p, int locked)
  418. {
  419. int pri;
  420. pri = rq - sch->runq;
  421. if(!locked)
  422. lock(sch);
  423. else if(canlock(sch))
  424. panic("queueproc: locked and can lock");
  425. p->priority = pri;
  426. p->rnext = 0;
  427. if(rq->tail)
  428. rq->tail->rnext = p;
  429. else
  430. rq->head = p;
  431. rq->tail = p;
  432. rq->n++;
  433. sch->nrdy++;
  434. sch->runvec |= 1<<pri;
  435. if(!locked)
  436. unlock(sch);
  437. }
  438. /*
  439. * try to remove a process from a scheduling queue (called splhi)
  440. */
  441. Proc*
  442. dequeueproc(Sched *sch, Schedq *rq, Proc *tp)
  443. {
  444. Proc *l, *p;
  445. if(!canlock(sch))
  446. return nil;
  447. /*
  448. * the queue may have changed before we locked runq,
  449. * refind the target process.
  450. */
  451. l = 0;
  452. for(p = rq->head; p; p = p->rnext){
  453. if(p == tp)
  454. break;
  455. l = p;
  456. }
  457. /*
  458. * p->mach==0 only when process state is saved
  459. */
  460. if(p == 0 || p->mach){
  461. unlock(sch);
  462. return nil;
  463. }
  464. if(p->rnext == 0)
  465. rq->tail = l;
  466. if(l)
  467. l->rnext = p->rnext;
  468. else
  469. rq->head = p->rnext;
  470. if(rq->head == nil)
  471. sch->runvec &= ~(1<<(rq-sch->runq));
  472. rq->n--;
  473. sch->nrdy--;
  474. if(p->state != Ready)
  475. print("dequeueproc %s %d %s\n", p->text, p->pid, statename[p->state]);
  476. unlock(sch);
  477. return p;
  478. }
  479. static void
  480. schedready(Sched *sch, Proc *p, int locked)
  481. {
  482. Mpl pl;
  483. int pri;
  484. Schedq *rq;
  485. pl = splhi();
  486. if(edfready(p)){
  487. splx(pl);
  488. return;
  489. }
  490. /* if(m->externup != p)
  491. m->readied = p; *//* group scheduling, will be removed */
  492. updatecpu(p);
  493. pri = reprioritize(p);
  494. p->priority = pri;
  495. rq = &sch->runq[pri];
  496. p->state = Ready;
  497. queueproc(sch, rq, p, locked);
  498. if(p->trace)
  499. proctrace(p, SReady, 0);
  500. splx(pl);
  501. }
  502. /*
  503. * ready(p) picks a new priority for a process and sticks it in the
  504. * runq for that priority.
  505. */
  506. void
  507. ready(Proc *p)
  508. {
  509. schedready(procsched(p), p, 0);
  510. }
  511. /*
  512. * yield the processor and drop our priority
  513. */
  514. void
  515. yield(void)
  516. {
  517. Mach *m = machp();
  518. if(anyready()){
  519. /* pretend we just used 1/2 tick */
  520. m->externup->lastupdate -= Scaling/2;
  521. sched();
  522. }
  523. }
  524. /*
  525. * recalculate priorities once a second. We need to do this
  526. * since priorities will otherwise only be recalculated when
  527. * the running process blocks.
  528. */
  529. static void
  530. rebalance(void)
  531. {
  532. Mach *m = machp();
  533. Mpl pl;
  534. int pri, npri, t;
  535. Schedq *rq;
  536. Proc *p;
  537. t = m->ticks;
  538. if(t - run.balancetime < HZ)
  539. return;
  540. run.balancetime = t;
  541. for(pri=0, rq=run.runq; pri<Npriq; pri++, rq++){
  542. another:
  543. p = rq->head;
  544. if(p == nil)
  545. continue;
  546. if(p->mp != sys->machptr[m->machno]) //MACHP(m->machno)
  547. continue;
  548. if(pri == p->basepri)
  549. continue;
  550. updatecpu(p);
  551. npri = reprioritize(p);
  552. if(npri != pri){
  553. pl = splhi();
  554. p = dequeueproc(&run, rq, p);
  555. if(p)
  556. queueproc(&run, &run.runq[npri], p, 0);
  557. splx(pl);
  558. goto another;
  559. }
  560. }
  561. }
  562. /*
  563. * Process p is ready to run, but there's no available core.
  564. * Try to make a core available by
  565. * 1. preempting a process with lower priority, or
  566. * 2. preempting one with the same priority that had more than HZ/10, or
  567. * 3. rescheduling one that run more than HZ, in the hope he gets his priority lowered.
  568. */
  569. static void
  570. preemptfor(Proc *p)
  571. {
  572. Mach *m = machp();
  573. uint32_t delta;
  574. uint i, /*j,*/ rr;
  575. Proc *mup;
  576. Mach *mp;
  577. assert(m->machno == 0);
  578. /*
  579. * try to preempt a lower priority process first, default back to
  580. * round robin otherwise.
  581. */
  582. for(rr = 0; rr < 2; rr++)
  583. for(i = 0; i < MACHMAX; i++){
  584. /*j = pickcore(p->color, i);
  585. if((mp = sys->machptr[j]) != nil && mp->online && mp->nixtype == NIXTC){*/
  586. if((mp = sys->machptr[i]) != nil && mp->online && mp->nixtype == NIXTC){
  587. if(mp == m)
  588. continue;
  589. /*
  590. * Caution here: mp->proc can change, even die.
  591. */
  592. mup = mp->proc;
  593. if(mup == nil) /* one got idle */
  594. return;
  595. delta = mp->ticks - mp->qstart;
  596. if(m->externup->priority < p->priority){
  597. mp->qexpired = 1;
  598. return;
  599. }
  600. if(rr && m->externup->priority == p->priority && delta > HZ/10){
  601. mp->qexpired = 1;
  602. return;
  603. }
  604. if(rr & delta > HZ){
  605. mp->qexpired = 1;
  606. return;
  607. }
  608. }
  609. }
  610. }
  611. /*
  612. * Scheduling thread run as the main loop of cpu 0
  613. * Used in AMP sched.
  614. */
  615. static void
  616. mach0sched(void)
  617. {
  618. Mach *m = machp();
  619. Schedq *rq;
  620. Proc *p;
  621. Mach *mp;
  622. uint32_t start, now;
  623. int n, i; //, j;
  624. assert(m->machno == 0);
  625. acmodeset(NIXKC); /* we don't time share any more */
  626. n = 0;
  627. start = perfticks();
  628. loop:
  629. /*
  630. * find a ready process that we might run.
  631. */
  632. spllo();
  633. for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--)
  634. for(p = rq->head; p; p = p->rnext){
  635. /*
  636. * wired processes may only run when their core is available.
  637. */
  638. if(p->wired != nil){
  639. if(p->wired->proc == nil)
  640. goto found;
  641. continue;
  642. }
  643. /*
  644. * find a ready process that did run at an available core
  645. * or one that has not moved for some time.
  646. */
  647. if(p->mp == nil || p->mp->proc == nil || n>0){
  648. goto found;
  649. }
  650. }
  651. /* waste time or halt the CPU */
  652. idlehands();
  653. /* remember how much time we're here */
  654. now = perfticks();
  655. m->perf.inidle += now-start;
  656. start = now;
  657. n++;
  658. goto loop;
  659. found:
  660. assert(m->machno == 0);
  661. splhi();
  662. /*
  663. * find a core for this process, but honor wiring.
  664. */
  665. mp = p->wired;
  666. if(mp != nil){
  667. if(mp->proc != nil)
  668. goto loop;
  669. }else{
  670. for(i = 0; i < MACHMAX; i++){
  671. /*j = pickcore(p->color, i);
  672. if((mp = sys->machptr[j]) != nil && mp->online && mp->nixtype == NIXTC){*/
  673. if((mp = sys->machptr[i]) != nil){ // && mp->online && mp->nixtype == NIXTC){
  674. if(mp != m && mp->proc == nil)
  675. break;
  676. }
  677. }
  678. if(i == MACHMAX){
  679. preemptfor(p);
  680. goto loop;
  681. }
  682. }
  683. p = dequeueproc(&run, rq, p);
  684. mp->proc = p;
  685. if(p != nil){
  686. p->state = Scheding;
  687. p->mp = mp;
  688. }
  689. n = 0;
  690. goto loop;
  691. }
  692. /*
  693. * SMP performs better than AMP with few cores.
  694. * So, leave this here by now. We should probably
  695. * write a unified version of runproc good enough for
  696. * both SMP and AMP.
  697. */
  698. static Proc*
  699. smprunproc(void)
  700. {
  701. Mach *m = machp();
  702. Schedq *rq;
  703. Proc *p;
  704. uint32_t start, now;
  705. int i;
  706. start = perfticks();
  707. run.preempts++;
  708. loop:
  709. /*
  710. * find a process that last ran on this processor (affinity),
  711. * or one that hasn't moved in a while (load balancing). Every
  712. * time around the loop affinity goes down.
  713. */
  714. spllo();
  715. for(i = 0;; i++){
  716. /*
  717. * find the highest priority target process that this
  718. * processor can run given affinity constraints.
  719. *
  720. */
  721. for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--){
  722. for(p = rq->head; p; p = p->rnext){
  723. if(p->mp == nil || p->mp == sys->machptr[m->machno]
  724. || (!p->wired && i > 0))
  725. goto found;
  726. }
  727. }
  728. /* waste time or halt the CPU */
  729. idlehands();
  730. /* remember how much time we're here */
  731. now = perfticks();
  732. m->perf.inidle += now-start;
  733. start = now;
  734. }
  735. found:
  736. splhi();
  737. p = dequeueproc(&run, rq, p);
  738. if(p == nil)
  739. goto loop;
  740. p->state = Scheding;
  741. p->mp = sys->machptr[m->machno];
  742. if(edflock(p)){
  743. edfrun(p, rq == &run.runq[PriEdf]); /* start deadline timer and do admin */
  744. edfunlock();
  745. }
  746. if(p->trace)
  747. proctrace(p, SRun, 0);
  748. return p;
  749. }
  750. /*
  751. * It's possible to force to single core even
  752. * in a multiprocessor machine
  753. */
  754. static Proc*
  755. singlerunproc(void)
  756. {
  757. Mach *m = machp();
  758. Schedq *rq;
  759. Proc *p;
  760. uint32_t start, now, skipscheds;
  761. int i;
  762. start = perfticks();
  763. /* cooperative scheduling until the clock ticks */
  764. if((p=m->readied) && p->mach==0 && p->state==Ready
  765. && &run.runq[Nrq-1].head == nil && &run.runq[Nrq-2].head == nil){
  766. skipscheds++;
  767. rq = &run.runq[p->priority];
  768. if(0)hi("runproc going to found before loop...\n");
  769. goto found;
  770. }
  771. run.preempts++;
  772. loop:
  773. /*
  774. * find a process that last ran on this processor (affinity),
  775. * or one that hasn't moved in a while (load balancing). Every
  776. * time around the loop affinity goes down.
  777. */
  778. spllo();
  779. for(i = 0;; i++){
  780. /*
  781. * find the highest priority target process that this
  782. * processor can run given affinity constraints.
  783. *
  784. */
  785. for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--){
  786. for(p = rq->head; p; p = p->rnext){
  787. if(p->mp == nil || p->mp == sys->machptr[m->machno]
  788. || (!p->wired && i > 0))
  789. {
  790. if(0)hi("runproc going to found inside loop...\n");
  791. goto found;
  792. }
  793. }
  794. }
  795. /* waste time or halt the CPU */
  796. idlehands();
  797. /* remember how much time we're here */
  798. now = perfticks();
  799. m->perf.inidle += now-start;
  800. start = now;
  801. }
  802. found:
  803. splhi();
  804. if(0)hi("runproc into found...\n");
  805. p = dequeueproc(&run, rq, p);
  806. if(p == nil)
  807. {
  808. if(0)hi("runproc p=nil :(\n");
  809. goto loop;
  810. }
  811. p->state = Scheding;
  812. if(0)hi("runproc, pm->mp = sys->machptr[m->machno]\n");
  813. p->mp = sys->machptr[m->machno];
  814. if(0){hi("runproc, sys->machptr[m->machno] = "); put64((uint64_t)p->mp); hi("\n");}
  815. if(edflock(p)){
  816. edfrun(p, rq == &run.runq[PriEdf]); /* start deadline timer and do admin */
  817. edfunlock();
  818. }
  819. if(p->trace)
  820. proctrace(p, SRun, 0);
  821. /* avoiding warnings, this will be removed */
  822. USED(mach0sched); USED(smprunproc);
  823. if(0){hi("runproc, returning p ");
  824. put64((uint64_t)p);
  825. hi("\n");}
  826. return p;
  827. }
  828. /*
  829. * pick a process to run.
  830. * most of this is used in AMP sched.
  831. * (on a quad core or less, we use SMP).
  832. * In the case of core 0 we always return nil, but
  833. * schedule the picked process at any other available TC.
  834. * In the case of other cores we wait until a process is given
  835. * by core 0.
  836. */
  837. Proc*
  838. runproc(void)
  839. {
  840. Mach *m = machp();
  841. Schedq *rq;
  842. Proc *p;
  843. uint32_t start, now;
  844. if(nosmp)
  845. return singlerunproc();
  846. //NIX modeset cannot work without halt every cpu at boot
  847. //if(sys->nmach <= AMPmincores)
  848. else
  849. return smprunproc();
  850. start = perfticks();
  851. run.preempts++;
  852. rq = nil;
  853. if(m->machno != 0){
  854. do{
  855. spllo();
  856. while(m->proc == nil)
  857. idlehands();
  858. now = perfticks();
  859. m->perf.inidle += now-start;
  860. start = now;
  861. splhi();
  862. p = m->proc;
  863. }while(p == nil);
  864. p->state = Scheding;
  865. p->mp = sys->machptr[m->machno];
  866. if(edflock(p)){
  867. edfrun(p, rq == &run.runq[PriEdf]); /* start deadline timer and do admin */
  868. edfunlock();
  869. }
  870. if(p->trace)
  871. proctrace(p, SRun, 0);
  872. return p;
  873. }
  874. mach0sched();
  875. return nil; /* not reached */
  876. }
  877. int
  878. canpage(Proc *p)
  879. {
  880. int ok;
  881. Sched *sch;
  882. splhi();
  883. sch = procsched(p);
  884. lock(sch);
  885. /* Only reliable way to see if we are Running */
  886. if(p->mach == 0) {
  887. p->newtlb = 1;
  888. ok = 1;
  889. }
  890. else
  891. ok = 0;
  892. unlock(sch);
  893. spllo();
  894. return ok;
  895. }
  896. Proc*
  897. newproc(void)
  898. {
  899. Mach *m = machp();
  900. Proc *p;
  901. p = psalloc();
  902. p->state = Scheding;
  903. p->psstate = "New";
  904. p->mach = 0;
  905. p->qnext = 0;
  906. p->nchild = 0;
  907. p->nwait = 0;
  908. p->waitq = 0;
  909. p->parent = 0;
  910. p->pgrp = 0;
  911. p->egrp = 0;
  912. p->fgrp = 0;
  913. p->rgrp = 0;
  914. p->pdbg = 0;
  915. p->kp = 0;
  916. if(m->externup != nil && m->externup->procctl == Proc_tracesyscall)
  917. p->procctl = Proc_tracesyscall;
  918. else
  919. p->procctl = 0;
  920. p->syscalltrace = nil;
  921. p->notepending = 0;
  922. p->ureg = 0;
  923. p->privatemem = 0;
  924. p->noswap = 0;
  925. p->errstr = p->errbuf0;
  926. p->syserrstr = p->errbuf1;
  927. p->errbuf0[0] = '\0';
  928. p->errbuf1[0] = '\0';
  929. p->nlocks = 0;
  930. p->delaysched = 0;
  931. p->trace = 0;
  932. kstrdup(&p->user, "*nouser");
  933. kstrdup(&p->text, "*notext");
  934. kstrdup(&p->args, "");
  935. p->nargs = 0;
  936. p->setargs = 0;
  937. memset(p->seg, 0, sizeof p->seg);
  938. p->pid = incref(&pidalloc);
  939. pshash(p);
  940. p->noteid = incref(&noteidalloc);
  941. if(p->pid <= 0 || p->noteid <= 0)
  942. panic("pidalloc");
  943. if(p->kstack == 0){
  944. p->kstack = smalloc(KSTACK);
  945. *(uintptr_t*)p->kstack = STACKGUARD;
  946. }
  947. /* sched params */
  948. p->mp = 0;
  949. p->wired = 0;
  950. procpriority(p, PriNormal, 0);
  951. p->cpu = 0;
  952. p->lastupdate = sys->ticks*Scaling;
  953. p->edf = nil;
  954. p->ntrap = 0;
  955. p->nintr = 0;
  956. p->nsyscall = 0;
  957. p->nactrap = 0;
  958. p->nacsyscall = 0;
  959. p->nicc = 0;
  960. p->actime = 0ULL;
  961. p->tctime = 0ULL;
  962. p->ac = nil;
  963. p->nfullq = 0;
  964. p->req = nil;
  965. p->resp = nil;
  966. memset(&p->PMMU, 0, sizeof p->PMMU);
  967. return p;
  968. }
  969. /*
  970. * wire this proc to a machine
  971. */
  972. void
  973. procwired(Proc *p, int bm)
  974. {
  975. Mach *m = machp();
  976. Proc *pp;
  977. int i;
  978. char nwired[MACHMAX];
  979. Mach *wm;
  980. if(bm < 0){
  981. /* pick a machine to wire to */
  982. memset(nwired, 0, sizeof(nwired));
  983. p->wired = 0;
  984. for(i=0; (pp = psincref(i)) != nil; i++){
  985. wm = pp->wired;
  986. if(wm && pp->pid)
  987. nwired[wm->machno]++;
  988. psdecref(pp);
  989. }
  990. bm = 0;
  991. for(i=0; i<sys->nmach; i++)
  992. if(nwired[i] < nwired[bm])
  993. bm = i;
  994. } else {
  995. /* use the virtual machine requested */
  996. bm = bm % sys->nmach;
  997. }
  998. p->wired = sys->machptr[bm];
  999. p->mp = p->wired;
  1000. /*
  1001. * adjust our color to the new domain.
  1002. */
  1003. if(m->externup == nil || p != m->externup)
  1004. return;
  1005. m->externup->color = corecolor(m->externup->mp->machno);
  1006. qlock(&m->externup->seglock);
  1007. for(i = 0; i < NSEG; i++)
  1008. if(m->externup->seg[i])
  1009. m->externup->seg[i]->color = m->externup->color;
  1010. qunlock(&m->externup->seglock);
  1011. }
  1012. void
  1013. procpriority(Proc *p, int pri, int fixed)
  1014. {
  1015. if(pri >= Npriq)
  1016. pri = Npriq - 1;
  1017. else if(pri < 0)
  1018. pri = 0;
  1019. p->basepri = pri;
  1020. p->priority = pri;
  1021. if(fixed){
  1022. p->fixedpri = 1;
  1023. } else {
  1024. p->fixedpri = 0;
  1025. }
  1026. }
  1027. /*
  1028. * sleep if a condition is not true. Another process will
  1029. * awaken us after it sets the condition. When we awaken
  1030. * the condition may no longer be true.
  1031. *
  1032. * we lock both the process and the rendezvous to keep r->p
  1033. * and p->r synchronized.
  1034. */
  1035. void
  1036. sleep(Rendez *r, int (*f)(void*), void *arg)
  1037. {
  1038. Mach *m = machp();
  1039. Mpl pl;
  1040. pl = splhi();
  1041. if(m->externup->nlocks)
  1042. print("process %d sleeps with %d locks held, last lock %#p locked at pc %#p, sleep called from %#p\n",
  1043. m->externup->pid, m->externup->nlocks, m->externup->lastlock, m->externup->lastlock->_pc, getcallerpc(&r));
  1044. lock(r);
  1045. lock(&m->externup->rlock);
  1046. if(r->_p){
  1047. print("double sleep called from %#p, %d %d\n",
  1048. getcallerpc(&r), r->_p->pid, m->externup->pid);
  1049. dumpstack();
  1050. }
  1051. /*
  1052. * Wakeup only knows there may be something to do by testing
  1053. * r->p in order to get something to lock on.
  1054. * Flush that information out to memory in case the sleep is
  1055. * committed.
  1056. */
  1057. r->_p = m->externup;
  1058. if((*f)(arg) || m->externup->notepending){
  1059. /*
  1060. * if condition happened or a note is pending
  1061. * never mind
  1062. */
  1063. r->_p = nil;
  1064. unlock(&m->externup->rlock);
  1065. unlock(r);
  1066. } else {
  1067. /*
  1068. * now we are committed to
  1069. * change state and call scheduler
  1070. */
  1071. if(m->externup->trace)
  1072. proctrace(m->externup, SSleep, 0);
  1073. m->externup->state = Wakeme;
  1074. m->externup->r = r;
  1075. /* statistics */
  1076. m->cs++;
  1077. procsave(m->externup);
  1078. mmuflushtlb(m->pml4->pa);
  1079. if(setlabel(&m->externup->sched)) {
  1080. /*
  1081. * here when the process is awakened
  1082. */
  1083. procrestore(m->externup);
  1084. spllo();
  1085. } else {
  1086. /*
  1087. * here to go to sleep (i.e. stop Running)
  1088. */
  1089. unlock(&m->externup->rlock);
  1090. unlock(r);
  1091. /*debug*/gotolabel(&m->sched);
  1092. }
  1093. }
  1094. if(m->externup->notepending) {
  1095. m->externup->notepending = 0;
  1096. splx(pl);
  1097. if(m->externup->procctl == Proc_exitme && m->externup->closingfgrp)
  1098. forceclosefgrp();
  1099. error(Eintr);
  1100. }
  1101. splx(pl);
  1102. }
  1103. static int
  1104. tfn(void *arg)
  1105. {
  1106. Mach *m = machp();
  1107. return m->externup->trend == nil || m->externup->tfn(arg);
  1108. }
  1109. void
  1110. twakeup(Ureg* ureg, Timer *t)
  1111. {
  1112. Proc *p;
  1113. Rendez *trend;
  1114. p = t->ta;
  1115. trend = p->trend;
  1116. p->trend = 0;
  1117. if(trend)
  1118. wakeup(trend);
  1119. }
  1120. void
  1121. tsleep(Rendez *r, int (*fn)(void*), void *arg, int32_t ms)
  1122. {
  1123. Mach *m = machp();
  1124. if (m->externup->tt){
  1125. print("tsleep: timer active: mode %d, tf %#p\n",
  1126. m->externup->tmode, m->externup->tf);
  1127. timerdel(m->externup);
  1128. }
  1129. m->externup->tns = MS2NS(ms);
  1130. m->externup->tf = twakeup;
  1131. m->externup->tmode = Trelative;
  1132. m->externup->ta = m->externup;
  1133. m->externup->trend = r;
  1134. m->externup->tfn = fn;
  1135. timeradd(m->externup);
  1136. if(waserror()){
  1137. timerdel(m->externup);
  1138. nexterror();
  1139. }
  1140. sleep(r, tfn, arg);
  1141. if (m->externup->tt)
  1142. timerdel(m->externup);
  1143. m->externup->twhen = 0;
  1144. poperror();
  1145. }
  1146. /*
  1147. * Expects that only one process can call wakeup for any given Rendez.
  1148. * We hold both locks to ensure that r->p and p->r remain consistent.
  1149. * Richard Miller has a better solution that doesn't require both to
  1150. * be held simultaneously, but I'm a paranoid - presotto.
  1151. */
  1152. Proc*
  1153. wakeup(Rendez *r)
  1154. {
  1155. Mpl pl;
  1156. Proc *p;
  1157. pl = splhi();
  1158. lock(r);
  1159. p = r->_p;
  1160. if(p != nil){
  1161. lock(&p->rlock);
  1162. if(p->state != Wakeme || p->r != r)
  1163. panic("wakeup: state");
  1164. r->_p = nil;
  1165. p->r = nil;
  1166. ready(p);
  1167. unlock(&p->rlock);
  1168. }
  1169. unlock(r);
  1170. splx(pl);
  1171. return p;
  1172. }
  1173. /*
  1174. * if waking a sleeping process, this routine must hold both
  1175. * p->rlock and r->lock. However, it can't know them in
  1176. * the same order as wakeup causing a possible lock ordering
  1177. * deadlock. We break the deadlock by giving up the p->rlock
  1178. * lock if we can't get the r->lock and retrying.
  1179. */
  1180. int
  1181. postnote(Proc *p, int dolock, char *n, int flag)
  1182. {
  1183. Mpl pl;
  1184. int ret;
  1185. Rendez *r;
  1186. Proc *d, **l;
  1187. if(dolock)
  1188. qlock(&p->debug);
  1189. if(flag != NUser && (p->notify == 0 || p->notified))
  1190. p->nnote = 0;
  1191. ret = 0;
  1192. if(p->nnote < NNOTE) {
  1193. strcpy(p->note[p->nnote].msg, n);
  1194. p->note[p->nnote++].flag = flag;
  1195. ret = 1;
  1196. }
  1197. p->notepending = 1;
  1198. /* NIX */
  1199. if(p->state == Exotic){
  1200. /* it could be that the process is not running
  1201. * in the AC when we interrupt the AC, but then
  1202. * we'd only get an extra interrupt in the AC, and
  1203. * nothing should happen.
  1204. */
  1205. intrac(p);
  1206. }
  1207. if(dolock)
  1208. qunlock(&p->debug);
  1209. /* this loop is to avoid lock ordering problems. */
  1210. for(;;){
  1211. pl = splhi();
  1212. lock(&p->rlock);
  1213. r = p->r;
  1214. /* waiting for a wakeup? */
  1215. if(r == nil)
  1216. break; /* no */
  1217. /* try for the second lock */
  1218. if(canlock(r)){
  1219. if(p->state != Wakeme || r->_p != p)
  1220. panic("postnote: state %d %d %d", r->_p != p, p->r != r, p->state);
  1221. p->r = nil;
  1222. r->_p = nil;
  1223. ready(p);
  1224. unlock(r);
  1225. break;
  1226. }
  1227. /* give other process time to get out of critical section and try again */
  1228. unlock(&p->rlock);
  1229. splx(pl);
  1230. sched();
  1231. }
  1232. unlock(&p->rlock);
  1233. splx(pl);
  1234. if(p->state != Rendezvous){
  1235. if(p->state == Semdown)
  1236. ready(p);
  1237. return ret;
  1238. }
  1239. /* Try and pull out of a rendezvous */
  1240. lock(p->rgrp);
  1241. if(p->state == Rendezvous) {
  1242. p->rendval = ~0;
  1243. l = &REND(p->rgrp, p->rendtag);
  1244. for(d = *l; d; d = d->rendhash) {
  1245. if(d == p) {
  1246. *l = p->rendhash;
  1247. break;
  1248. }
  1249. l = &d->rendhash;
  1250. }
  1251. ready(p);
  1252. }
  1253. unlock(p->rgrp);
  1254. return ret;
  1255. }
  1256. /*
  1257. * weird thing: keep at most NBROKEN around
  1258. */
  1259. #define NBROKEN 4
  1260. struct
  1261. {
  1262. QLock;
  1263. int n;
  1264. Proc *p[NBROKEN];
  1265. }broken;
  1266. void
  1267. addbroken(Proc *p)
  1268. {
  1269. Mach *m = machp();
  1270. qlock(&broken);
  1271. if(broken.n == NBROKEN) {
  1272. ready(broken.p[0]);
  1273. memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1));
  1274. --broken.n;
  1275. }
  1276. broken.p[broken.n++] = p;
  1277. qunlock(&broken);
  1278. stopac();
  1279. edfstop(m->externup);
  1280. p->state = Broken;
  1281. p->psstate = 0;
  1282. sched();
  1283. }
  1284. void
  1285. unbreak(Proc *p)
  1286. {
  1287. int b;
  1288. qlock(&broken);
  1289. for(b=0; b < broken.n; b++)
  1290. if(broken.p[b] == p) {
  1291. broken.n--;
  1292. memmove(&broken.p[b], &broken.p[b+1],
  1293. sizeof(Proc*)*(NBROKEN-(b+1)));
  1294. ready(p);
  1295. break;
  1296. }
  1297. qunlock(&broken);
  1298. }
  1299. int
  1300. freebroken(void)
  1301. {
  1302. int i, n;
  1303. qlock(&broken);
  1304. n = broken.n;
  1305. for(i=0; i<n; i++) {
  1306. ready(broken.p[i]);
  1307. broken.p[i] = 0;
  1308. }
  1309. broken.n = 0;
  1310. qunlock(&broken);
  1311. return n;
  1312. }
  1313. void
  1314. pexit(char *exitstr, int freemem)
  1315. {
  1316. Mach *m = machp();
  1317. Proc *p;
  1318. Segment **s, **es;
  1319. int32_t utime, stime;
  1320. Waitq *wq, *f, *next;
  1321. Fgrp *fgrp;
  1322. Egrp *egrp;
  1323. Rgrp *rgrp;
  1324. Pgrp *pgrp;
  1325. Chan *dot;
  1326. if(0 && m->externup->nfullq > 0)
  1327. iprint(" %s=%d", m->externup->text, m->externup->nfullq);
  1328. if(0 && m->externup->nicc > 0)
  1329. iprint(" [%s nicc %ud tctime %ulld actime %ulld]\n",
  1330. m->externup->text, m->externup->nicc, m->externup->tctime, m->externup->actime);
  1331. if(m->externup->syscalltrace != nil)
  1332. free(m->externup->syscalltrace);
  1333. m->externup->syscalltrace = nil;
  1334. m->externup->alarm = 0;
  1335. if (m->externup->tt)
  1336. timerdel(m->externup);
  1337. if(m->externup->trace)
  1338. proctrace(m->externup, SDead, 0);
  1339. /* nil out all the resources under lock (free later) */
  1340. qlock(&m->externup->debug);
  1341. fgrp = m->externup->fgrp;
  1342. m->externup->fgrp = nil;
  1343. egrp = m->externup->egrp;
  1344. m->externup->egrp = nil;
  1345. rgrp = m->externup->rgrp;
  1346. m->externup->rgrp = nil;
  1347. pgrp = m->externup->pgrp;
  1348. m->externup->pgrp = nil;
  1349. dot = m->externup->dot;
  1350. m->externup->dot = nil;
  1351. qunlock(&m->externup->debug);
  1352. if(fgrp)
  1353. closefgrp(fgrp);
  1354. if(egrp)
  1355. closeegrp(egrp);
  1356. if(rgrp)
  1357. closergrp(rgrp);
  1358. if(dot)
  1359. cclose(dot);
  1360. if(pgrp)
  1361. closepgrp(pgrp);
  1362. /*
  1363. * if not a kernel process and have a parent,
  1364. * do some housekeeping.
  1365. */
  1366. if(m->externup->kp == 0) {
  1367. p = m->externup->parent;
  1368. if(p == 0) {
  1369. if(exitstr == 0)
  1370. exitstr = "unknown";
  1371. //die("bootprocessdeath");
  1372. panic("boot process died: %s", exitstr);
  1373. }
  1374. while(waserror())
  1375. ;
  1376. wq = smalloc(sizeof(Waitq));
  1377. poperror();
  1378. wq->w.pid = m->externup->pid;
  1379. utime = m->externup->time[TUser] + m->externup->time[TCUser];
  1380. stime = m->externup->time[TSys] + m->externup->time[TCSys];
  1381. wq->w.time[TUser] = tk2ms(utime);
  1382. wq->w.time[TSys] = tk2ms(stime);
  1383. wq->w.time[TReal] = tk2ms(sys->machptr[0]->ticks - m->externup->time[TReal]);
  1384. if(exitstr && exitstr[0])
  1385. snprint(wq->w.msg, sizeof(wq->w.msg), "%s %d: %s",
  1386. m->externup->text, m->externup->pid, exitstr);
  1387. else
  1388. wq->w.msg[0] = '\0';
  1389. lock(&p->exl);
  1390. /*
  1391. * Check that parent is still alive.
  1392. */
  1393. if(p->pid == m->externup->parentpid && p->state != Broken) {
  1394. p->nchild--;
  1395. p->time[TCUser] += utime;
  1396. p->time[TCSys] += stime;
  1397. /*
  1398. * If there would be more than 128 wait records
  1399. * processes for my parent, then don't leave a wait
  1400. * record behind. This helps prevent badly written
  1401. * daemon processes from accumulating lots of wait
  1402. * records.
  1403. */
  1404. if(p->nwait < 128) {
  1405. wq->next = p->waitq;
  1406. p->waitq = wq;
  1407. p->nwait++;
  1408. wq = nil;
  1409. wakeup(&p->waitr);
  1410. }
  1411. }
  1412. unlock(&p->exl);
  1413. if(wq)
  1414. free(wq);
  1415. }
  1416. if(!freemem)
  1417. addbroken(m->externup);
  1418. qlock(&m->externup->seglock);
  1419. es = &m->externup->seg[NSEG];
  1420. for(s = m->externup->seg; s < es; s++) {
  1421. if(*s) {
  1422. putseg(*s);
  1423. *s = 0;
  1424. }
  1425. }
  1426. qunlock(&m->externup->seglock);
  1427. lock(&m->externup->exl); /* Prevent my children from leaving waits */
  1428. psunhash(m->externup);
  1429. m->externup->pid = 0;
  1430. wakeup(&m->externup->waitr);
  1431. unlock(&m->externup->exl);
  1432. for(f = m->externup->waitq; f; f = next) {
  1433. next = f->next;
  1434. free(f);
  1435. }
  1436. /* release debuggers */
  1437. qlock(&m->externup->debug);
  1438. if(m->externup->pdbg) {
  1439. wakeup(&m->externup->pdbg->sleep);
  1440. m->externup->pdbg = 0;
  1441. }
  1442. qunlock(&m->externup->debug);
  1443. /* Sched must not loop for these locks */
  1444. lock(&procalloc);
  1445. lock(&pga);
  1446. stopac();
  1447. edfstop(m->externup);
  1448. m->externup->state = Moribund;
  1449. sched();
  1450. panic("pexit");
  1451. }
  1452. int
  1453. haswaitq(void *x)
  1454. {
  1455. Proc *p;
  1456. p = (Proc *)x;
  1457. return p->waitq != 0;
  1458. }
  1459. int
  1460. pwait(Waitmsg *w)
  1461. {
  1462. Mach *m = machp();
  1463. int cpid;
  1464. Waitq *wq;
  1465. if(!canqlock(&m->externup->qwaitr))
  1466. error(Einuse);
  1467. if(waserror()) {
  1468. qunlock(&m->externup->qwaitr);
  1469. nexterror();
  1470. }
  1471. lock(&m->externup->exl);
  1472. if(m->externup->nchild == 0 && m->externup->waitq == 0) {
  1473. unlock(&m->externup->exl);
  1474. error(Enochild);
  1475. }
  1476. unlock(&m->externup->exl);
  1477. sleep(&m->externup->waitr, haswaitq, m->externup);
  1478. lock(&m->externup->exl);
  1479. wq = m->externup->waitq;
  1480. m->externup->waitq = wq->next;
  1481. m->externup->nwait--;
  1482. unlock(&m->externup->exl);
  1483. qunlock(&m->externup->qwaitr);
  1484. poperror();
  1485. if(w)
  1486. memmove(w, &wq->w, sizeof(Waitmsg));
  1487. cpid = wq->w.pid;
  1488. free(wq);
  1489. return cpid;
  1490. }
  1491. void
  1492. dumpaproc(Proc *p)
  1493. {
  1494. uintptr_t bss;
  1495. char *s;
  1496. if(p == 0)
  1497. return;
  1498. bss = 0;
  1499. if(p->seg[HSEG])
  1500. bss = p->seg[HSEG]->top;
  1501. else if(p->seg[BSEG])
  1502. bss = p->seg[BSEG]->top;
  1503. s = p->psstate;
  1504. if(s == 0)
  1505. s = statename[p->state];
  1506. print("%3d:%10s pc %#p dbgpc %#p %8s (%s) ut %ld st %ld bss %#p qpc %#p nl %d nd %lud lpc %#p pri %lud\n",
  1507. p->pid, p->text, p->pc, dbgpc(p), s, statename[p->state],
  1508. p->time[0], p->time[1], bss, p->qpc, p->nlocks,
  1509. p->delaysched, p->lastlock ? p->lastlock->_pc : 0, p->priority);
  1510. }
  1511. void
  1512. procdump(void)
  1513. {
  1514. Mach *m = machp();
  1515. int i;
  1516. Proc *p;
  1517. if(m->externup)
  1518. print("up %d\n", m->externup->pid);
  1519. else
  1520. print("no current process\n");
  1521. for(i=0; (p = psincref(i)) != nil; i++) {
  1522. if(p->state != Dead)
  1523. dumpaproc(p);
  1524. psdecref(p);
  1525. }
  1526. }
  1527. /*
  1528. * wait till all processes have flushed their mmu
  1529. * state about segement s
  1530. */
  1531. void
  1532. procflushseg(Segment *s)
  1533. {
  1534. Mach *m = machp();
  1535. int i, ns, nm, nwait;
  1536. Proc *p;
  1537. Mach *mp;
  1538. /*
  1539. * tell all processes with this
  1540. * segment to flush their mmu's
  1541. */
  1542. nwait = 0;
  1543. for(i=0; (p = psincref(i)) != nil; i++) {
  1544. if(p->state == Dead){
  1545. psdecref(p);
  1546. continue;
  1547. }
  1548. for(ns = 0; ns < NSEG; ns++){
  1549. if(p->seg[ns] == s){
  1550. p->newtlb = 1;
  1551. for(nm = 0; nm < MACHMAX; nm++)
  1552. if((mp = sys->machptr[nm]) != nil && mp->online)
  1553. if(mp->proc == p){
  1554. mp->mmuflush = 1;
  1555. nwait++;
  1556. }
  1557. break;
  1558. }
  1559. }
  1560. psdecref(p);
  1561. }
  1562. if(nwait == 0)
  1563. return;
  1564. /*
  1565. * wait for all processors to take a clock interrupt
  1566. * and flush their mmu's.
  1567. * NIX BUG: this won't work if another core is in AC mode.
  1568. * In that case we must IPI it, but only if that core is
  1569. * using this segment.
  1570. */
  1571. for(i = 0; i < MACHMAX; i++)
  1572. if((mp = sys->machptr[i]) != nil && mp->online)
  1573. if(mp != m)
  1574. while(mp->mmuflush)
  1575. sched();
  1576. }
  1577. void
  1578. scheddump(void)
  1579. {
  1580. Mach *m = machp();
  1581. Proc *p;
  1582. Schedq *rq;
  1583. for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--){
  1584. if(rq->head == 0)
  1585. continue;
  1586. print("run[%ld]:", rq-run.runq);
  1587. for(p = rq->head; p; p = p->rnext)
  1588. print(" %d(%lud)", p->pid, m->ticks - p->readytime);
  1589. print("\n");
  1590. delay(150);
  1591. }
  1592. print("nrdy %d\n", run.nrdy);
  1593. }
  1594. void
  1595. kproc(char *name, void (*func)(void *), void *arg)
  1596. {
  1597. Mach *m = machp();
  1598. Proc *p;
  1599. static Pgrp *kpgrp;
  1600. p = newproc();
  1601. p->psstate = 0;
  1602. p->procmode = 0640;
  1603. p->kp = 1;
  1604. p->noswap = 1;
  1605. p->scallnr = m->externup->scallnr;
  1606. memmove(p->arg, m->externup->arg, sizeof(m->externup->arg));
  1607. p->nerrlab = 0;
  1608. p->slash = m->externup->slash;
  1609. p->dot = m->externup->dot;
  1610. if(p->dot)
  1611. incref(p->dot);
  1612. memmove(p->note, m->externup->note, sizeof(p->note));
  1613. p->nnote = m->externup->nnote;
  1614. p->notified = 0;
  1615. p->lastnote = m->externup->lastnote;
  1616. p->notify = m->externup->notify;
  1617. p->ureg = 0;
  1618. p->dbgreg = 0;
  1619. procpriority(p, PriKproc, 0);
  1620. kprocchild(p, func, arg);
  1621. kstrdup(&p->user, eve);
  1622. kstrdup(&p->text, name);
  1623. if(kpgrp == 0)
  1624. kpgrp = newpgrp();
  1625. p->pgrp = kpgrp;
  1626. incref(kpgrp);
  1627. memset(p->time, 0, sizeof(p->time));
  1628. p->time[TReal] = sys->ticks;
  1629. ready(p);
  1630. /*
  1631. * since the bss/data segments are now shareable,
  1632. * any mmu info about this process is now stale
  1633. * and has to be discarded.
  1634. */
  1635. p->newtlb = 1;
  1636. mmuflush();
  1637. }
  1638. /*
  1639. * called splhi() by notify(). See comment in notify for the
  1640. * reasoning.
  1641. */
  1642. void
  1643. procctl(Proc *p)
  1644. {
  1645. Mach *m = machp();
  1646. Mpl pl;
  1647. char *state;
  1648. switch(p->procctl) {
  1649. case Proc_exitbig:
  1650. spllo();
  1651. pexit("Killed: Insufficient physical memory", 1);
  1652. case Proc_exitme:
  1653. spllo(); /* pexit has locks in it */
  1654. pexit("Killed", 1);
  1655. case Proc_traceme:
  1656. if(p->nnote == 0)
  1657. return;
  1658. /* No break */
  1659. case Proc_stopme:
  1660. p->procctl = 0;
  1661. state = p->psstate;
  1662. p->psstate = "Stopped";
  1663. /* free a waiting debugger */
  1664. pl = spllo();
  1665. qlock(&p->debug);
  1666. if(p->pdbg) {
  1667. wakeup(&p->pdbg->sleep);
  1668. p->pdbg = 0;
  1669. }
  1670. qunlock(&p->debug);
  1671. splhi();
  1672. p->state = Stopped;
  1673. sched();
  1674. p->psstate = state;
  1675. splx(pl);
  1676. return;
  1677. case Proc_toac:
  1678. p->procctl = 0;
  1679. /*
  1680. * This pretends to return from the system call,
  1681. * by moving to a core, but never returns (unless
  1682. * the process gets moved back to a TC.)
  1683. */
  1684. spllo();
  1685. runacore();
  1686. return;
  1687. case Proc_totc:
  1688. p->procctl = 0;
  1689. if(p != m->externup)
  1690. panic("procctl: stopac: p != up");
  1691. spllo();
  1692. stopac();
  1693. return;
  1694. }
  1695. }
  1696. void
  1697. error(char *err)
  1698. {
  1699. Mach *m = machp();
  1700. spllo();
  1701. assert(m->externup->nerrlab < NERR);
  1702. kstrcpy(m->externup->errstr, err, ERRMAX);
  1703. setlabel(&m->externup->errlab[NERR-1]);
  1704. nexterror();
  1705. }
  1706. void
  1707. nexterror(void)
  1708. {
  1709. Mach *m = machp();
  1710. /*debug*/gotolabel(&m->externup->errlab[--m->externup->nerrlab]);
  1711. }
  1712. void
  1713. exhausted(char *resource)
  1714. {
  1715. char buf[ERRMAX];
  1716. sprint(buf, "no free %s", resource);
  1717. iprint("%s\n", buf);
  1718. error(buf);
  1719. }
  1720. void
  1721. killbig(char *why)
  1722. {
  1723. int i, x;
  1724. Segment *s;
  1725. uint32_t l, max;
  1726. Proc *p, *kp;
  1727. max = 0;
  1728. kp = nil;
  1729. for(x = 0; (p = psincref(x)) != nil; x++) {
  1730. if(p->state == Dead || p->kp){
  1731. psdecref(p);
  1732. continue;
  1733. }
  1734. l = 0;
  1735. for(i=1; i<NSEG; i++) {
  1736. s = p->seg[i];
  1737. if(s != 0)
  1738. l += s->top - s->base;
  1739. }
  1740. if(l > max && ((p->procmode&0222) || strcmp(eve, p->user)!=0)) {
  1741. if(kp != nil)
  1742. psdecref(kp);
  1743. kp = p;
  1744. max = l;
  1745. }
  1746. else
  1747. psdecref(p);
  1748. }
  1749. if(kp == nil)
  1750. return;
  1751. print("%d: %s killed: %s\n", kp->pid, kp->text, why);
  1752. for(x = 0; (p = psincref(x)) != nil; x++) {
  1753. if(p->state == Dead || p->kp){
  1754. psdecref(p);
  1755. continue;
  1756. }
  1757. if(p != kp && p->seg[BSEG] && p->seg[BSEG] == kp->seg[BSEG])
  1758. p->procctl = Proc_exitbig;
  1759. psdecref(p);
  1760. }
  1761. kp->procctl = Proc_exitbig;
  1762. for(i = 0; i < NSEG; i++) {
  1763. s = kp->seg[i];
  1764. if(s != 0 && canqlock(&s->lk)) {
  1765. mfreeseg(s, s->base, (s->top - s->base)/BIGPGSZ);
  1766. qunlock(&s->lk);
  1767. }
  1768. }
  1769. psdecref(kp);
  1770. }
  1771. /*
  1772. * change ownership to 'new' of all processes owned by 'old'. Used when
  1773. * eve changes.
  1774. */
  1775. void
  1776. renameuser(char *old, char *new)
  1777. {
  1778. int i;
  1779. Proc *p;
  1780. for(i = 0; (p = psincref(i)) != nil; i++){
  1781. if(p->user!=nil && strcmp(old, p->user)==0)
  1782. kstrdup(&p->user, new);
  1783. psdecref(p);
  1784. }
  1785. }
  1786. /*
  1787. * time accounting called by clock() splhi'd
  1788. * only cpu1 computes system load average
  1789. * but the system load average is accounted for cpu0.
  1790. */
  1791. void
  1792. accounttime(void)
  1793. {
  1794. Mach *m = machp();
  1795. Proc *p;
  1796. uint32_t n, per;
  1797. p = m->proc;
  1798. if(p) {
  1799. if(m->machno == 1)
  1800. run.nrun++;
  1801. p->time[p->insyscall]++;
  1802. }
  1803. /* calculate decaying duty cycles */
  1804. n = perfticks();
  1805. per = n - m->perf.last;
  1806. m->perf.last = n;
  1807. per = (m->perf.period*(HZ-1) + per)/HZ;
  1808. if(per != 0)
  1809. m->perf.period = per;
  1810. m->perf.avg_inidle = (m->perf.avg_inidle*(HZ-1)+m->perf.inidle)/HZ;
  1811. m->perf.inidle = 0;
  1812. m->perf.avg_inintr = (m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ;
  1813. m->perf.inintr = 0;
  1814. /* only one processor gets to compute system load averages.
  1815. * it has to be mach 1 when we use AMP.
  1816. */
  1817. //if(sys->nmach > 1 && m->machno != 1)
  1818. if(m->machno != 0) //Change to non-AMP
  1819. return;
  1820. /*
  1821. * calculate decaying load average.
  1822. * if we decay by (n-1)/n then it takes
  1823. * n clock ticks to go from load L to .36 L once
  1824. * things quiet down. it takes about 5 n clock
  1825. * ticks to go to zero. so using HZ means this is
  1826. * approximately the load over the last second,
  1827. * with a tail lasting about 5 seconds.
  1828. */
  1829. n = run.nrun;
  1830. run.nrun = 0;
  1831. n = (run.nrdy+n)*1000;
  1832. sys->load = (sys->load*(HZ-1)+n)/HZ;
  1833. }
  1834. void
  1835. halt(void)
  1836. {
  1837. if(run.nrdy != 0)
  1838. return;
  1839. hardhalt();
  1840. }