042-fq_codel-Fair-Queue-Codel-AQM.patch 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839
  1. From f8cf19c19528a468cc0b9846c0328a94cccdc605 Mon Sep 17 00:00:00 2001
  2. From: Eric Dumazet <edumazet@google.com>
  3. Date: Fri, 11 May 2012 09:30:50 +0000
  4. Subject: [PATCH] fq_codel: Fair Queue Codel AQM
  5. commit 4b549a2ef4bef9965d97cbd992ba67930cd3e0fe upstream.
  6. Fair Queue Codel packet scheduler
  7. Principles :
  8. - Packets are classified (internal classifier or external) on flows.
  9. - This is a Stochastic model (as we use a hash, several flows might
  10. be hashed on same slot)
  11. - Each flow has a CoDel managed queue.
  12. - Flows are linked onto two (Round Robin) lists,
  13. so that new flows have priority on old ones.
  14. - For a given flow, packets are not reordered (CoDel uses a FIFO)
  15. - head drops only.
  16. - ECN capability is on by default.
  17. - Very low memory footprint (64 bytes per flow)
  18. tc qdisc ... fq_codel [ limit PACKETS ] [ flows number ]
  19. [ target TIME ] [ interval TIME ] [ noecn ]
  20. [ quantum BYTES ]
  21. defaults : 1024 flows, 10240 packets limit, quantum : device MTU
  22. target : 5ms (CoDel default)
  23. interval : 100ms (CoDel default)
  24. Impressive results on load :
  25. class htb 1:1 root leaf 10: prio 0 quantum 1514 rate 200000Kbit ceil 200000Kbit burst 1475b/8 mpu 0b overhead 0b cburst 1475b/8 mpu 0b overhead 0b level 0
  26. Sent 43304920109 bytes 33063109 pkt (dropped 0, overlimits 0 requeues 0)
  27. rate 201691Kbit 28595pps backlog 0b 312p requeues 0
  28. lended: 33063109 borrowed: 0 giants: 0
  29. tokens: -912 ctokens: -912
  30. class fq_codel 10:1735 parent 10:
  31. (dropped 1292, overlimits 0 requeues 0)
  32. backlog 15140b 10p requeues 0
  33. deficit 1514 count 1 lastcount 1 ldelay 7.1ms
  34. class fq_codel 10:4524 parent 10:
  35. (dropped 1291, overlimits 0 requeues 0)
  36. backlog 16654b 11p requeues 0
  37. deficit 1514 count 1 lastcount 1 ldelay 7.1ms
  38. class fq_codel 10:4e74 parent 10:
  39. (dropped 1290, overlimits 0 requeues 0)
  40. backlog 6056b 4p requeues 0
  41. deficit 1514 count 1 lastcount 1 ldelay 6.4ms dropping drop_next 92.0ms
  42. class fq_codel 10:628a parent 10:
  43. (dropped 1289, overlimits 0 requeues 0)
  44. backlog 7570b 5p requeues 0
  45. deficit 1514 count 1 lastcount 1 ldelay 5.4ms dropping drop_next 90.9ms
  46. class fq_codel 10:a4b3 parent 10:
  47. (dropped 302, overlimits 0 requeues 0)
  48. backlog 16654b 11p requeues 0
  49. deficit 1514 count 1 lastcount 1 ldelay 7.1ms
  50. class fq_codel 10:c3c2 parent 10:
  51. (dropped 1284, overlimits 0 requeues 0)
  52. backlog 13626b 9p requeues 0
  53. deficit 1514 count 1 lastcount 1 ldelay 5.9ms
  54. class fq_codel 10:d331 parent 10:
  55. (dropped 299, overlimits 0 requeues 0)
  56. backlog 15140b 10p requeues 0
  57. deficit 1514 count 1 lastcount 1 ldelay 7.0ms
  58. class fq_codel 10:d526 parent 10:
  59. (dropped 12160, overlimits 0 requeues 0)
  60. backlog 35870b 211p requeues 0
  61. deficit 1508 count 12160 lastcount 1 ldelay 15.3ms dropping drop_next 247us
  62. class fq_codel 10:e2c6 parent 10:
  63. (dropped 1288, overlimits 0 requeues 0)
  64. backlog 15140b 10p requeues 0
  65. deficit 1514 count 1 lastcount 1 ldelay 7.1ms
  66. class fq_codel 10:eab5 parent 10:
  67. (dropped 1285, overlimits 0 requeues 0)
  68. backlog 16654b 11p requeues 0
  69. deficit 1514 count 1 lastcount 1 ldelay 5.9ms
  70. class fq_codel 10:f220 parent 10:
  71. (dropped 1289, overlimits 0 requeues 0)
  72. backlog 15140b 10p requeues 0
  73. deficit 1514 count 1 lastcount 1 ldelay 7.1ms
  74. qdisc htb 1: root refcnt 6 r2q 10 default 1 direct_packets_stat 0 ver 3.17
  75. Sent 43331086547 bytes 33092812 pkt (dropped 0, overlimits 66063544 requeues 71)
  76. rate 201697Kbit 28602pps backlog 0b 260p requeues 71
  77. qdisc fq_codel 10: parent 1:1 limit 10240p flows 65536 target 5.0ms interval 100.0ms ecn
  78. Sent 43331086547 bytes 33092812 pkt (dropped 949359, overlimits 0 requeues 0)
  79. rate 201697Kbit 28602pps backlog 189352b 260p requeues 0
  80. maxpacket 1514 drop_overlimit 0 new_flow_count 5582 ecn_mark 125593
  81. new_flows_len 0 old_flows_len 11
  82. PING 172.30.42.18 (172.30.42.18) 56(84) bytes of data.
  83. 64 bytes from 172.30.42.18: icmp_req=1 ttl=64 time=0.227 ms
  84. 64 bytes from 172.30.42.18: icmp_req=2 ttl=64 time=0.165 ms
  85. 64 bytes from 172.30.42.18: icmp_req=3 ttl=64 time=0.166 ms
  86. 64 bytes from 172.30.42.18: icmp_req=4 ttl=64 time=0.151 ms
  87. 64 bytes from 172.30.42.18: icmp_req=5 ttl=64 time=0.164 ms
  88. 64 bytes from 172.30.42.18: icmp_req=6 ttl=64 time=0.172 ms
  89. 64 bytes from 172.30.42.18: icmp_req=7 ttl=64 time=0.175 ms
  90. 64 bytes from 172.30.42.18: icmp_req=8 ttl=64 time=0.183 ms
  91. 64 bytes from 172.30.42.18: icmp_req=9 ttl=64 time=0.158 ms
  92. 64 bytes from 172.30.42.18: icmp_req=10 ttl=64 time=0.200 ms
  93. 10 packets transmitted, 10 received, 0% packet loss, time 8999ms
  94. rtt min/avg/max/mdev = 0.151/0.176/0.227/0.022 ms
  95. Much better than SFQ because of priority given to new flows, and fast
  96. path dirtying less cache lines.
  97. Signed-off-by: Eric Dumazet <edumazet@google.com>
  98. Signed-off-by: David S. Miller <davem@davemloft.net>
  99. ---
  100. include/linux/pkt_sched.h | 54 ++++
  101. net/sched/Kconfig | 11 +
  102. net/sched/Makefile | 1 +
  103. net/sched/sch_fq_codel.c | 624 +++++++++++++++++++++++++++++++++++++++++++++
  104. 4 files changed, 690 insertions(+)
  105. create mode 100644 net/sched/sch_fq_codel.c
  106. --- a/include/linux/pkt_sched.h
  107. +++ b/include/linux/pkt_sched.h
  108. @@ -659,4 +659,58 @@ struct tc_codel_xstats {
  109. __u32 dropping; /* are we in dropping state ? */
  110. };
  111. +/* FQ_CODEL */
  112. +
  113. +enum {
  114. + TCA_FQ_CODEL_UNSPEC,
  115. + TCA_FQ_CODEL_TARGET,
  116. + TCA_FQ_CODEL_LIMIT,
  117. + TCA_FQ_CODEL_INTERVAL,
  118. + TCA_FQ_CODEL_ECN,
  119. + TCA_FQ_CODEL_FLOWS,
  120. + TCA_FQ_CODEL_QUANTUM,
  121. + __TCA_FQ_CODEL_MAX
  122. +};
  123. +
  124. +#define TCA_FQ_CODEL_MAX (__TCA_FQ_CODEL_MAX - 1)
  125. +
  126. +enum {
  127. + TCA_FQ_CODEL_XSTATS_QDISC,
  128. + TCA_FQ_CODEL_XSTATS_CLASS,
  129. +};
  130. +
  131. +struct tc_fq_codel_qd_stats {
  132. + __u32 maxpacket; /* largest packet we've seen so far */
  133. + __u32 drop_overlimit; /* number of time max qdisc
  134. + * packet limit was hit
  135. + */
  136. + __u32 ecn_mark; /* number of packets we ECN marked
  137. + * instead of being dropped
  138. + */
  139. + __u32 new_flow_count; /* number of time packets
  140. + * created a 'new flow'
  141. + */
  142. + __u32 new_flows_len; /* count of flows in new list */
  143. + __u32 old_flows_len; /* count of flows in old list */
  144. +};
  145. +
  146. +struct tc_fq_codel_cl_stats {
  147. + __s32 deficit;
  148. + __u32 ldelay; /* in-queue delay seen by most recently
  149. + * dequeued packet
  150. + */
  151. + __u32 count;
  152. + __u32 lastcount;
  153. + __u32 dropping;
  154. + __s32 drop_next;
  155. +};
  156. +
  157. +struct tc_fq_codel_xstats {
  158. + __u32 type;
  159. + union {
  160. + struct tc_fq_codel_qd_stats qdisc_stats;
  161. + struct tc_fq_codel_cl_stats class_stats;
  162. + };
  163. +};
  164. +
  165. #endif
  166. --- a/net/sched/Kconfig
  167. +++ b/net/sched/Kconfig
  168. @@ -261,6 +261,17 @@ config NET_SCH_CODEL
  169. If unsure, say N.
  170. +config NET_SCH_FQ_CODEL
  171. + tristate "Fair Queue Controlled Delay AQM (FQ_CODEL)"
  172. + help
  173. + Say Y here if you want to use the FQ Controlled Delay (FQ_CODEL)
  174. + packet scheduling algorithm.
  175. +
  176. + To compile this driver as a module, choose M here: the module
  177. + will be called sch_fq_codel.
  178. +
  179. + If unsure, say N.
  180. +
  181. config NET_SCH_INGRESS
  182. tristate "Ingress Qdisc"
  183. depends on NET_CLS_ACT
  184. --- a/net/sched/Makefile
  185. +++ b/net/sched/Makefile
  186. @@ -37,6 +37,7 @@ obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqpr
  187. obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o
  188. obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o
  189. obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o
  190. +obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o
  191. obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
  192. obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
  193. --- /dev/null
  194. +++ b/net/sched/sch_fq_codel.c
  195. @@ -0,0 +1,624 @@
  196. +/*
  197. + * Fair Queue CoDel discipline
  198. + *
  199. + * This program is free software; you can redistribute it and/or
  200. + * modify it under the terms of the GNU General Public License
  201. + * as published by the Free Software Foundation; either version
  202. + * 2 of the License, or (at your option) any later version.
  203. + *
  204. + * Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
  205. + */
  206. +
  207. +#include <linux/module.h>
  208. +#include <linux/types.h>
  209. +#include <linux/kernel.h>
  210. +#include <linux/jiffies.h>
  211. +#include <linux/string.h>
  212. +#include <linux/in.h>
  213. +#include <linux/errno.h>
  214. +#include <linux/init.h>
  215. +#include <linux/skbuff.h>
  216. +#include <linux/jhash.h>
  217. +#include <linux/slab.h>
  218. +#include <linux/vmalloc.h>
  219. +#include <net/netlink.h>
  220. +#include <net/pkt_sched.h>
  221. +#include <net/flow_keys.h>
  222. +#include <net/codel.h>
  223. +
  224. +/* Fair Queue CoDel.
  225. + *
  226. + * Principles :
  227. + * Packets are classified (internal classifier or external) on flows.
  228. + * This is a Stochastic model (as we use a hash, several flows
  229. + * might be hashed on same slot)
  230. + * Each flow has a CoDel managed queue.
  231. + * Flows are linked onto two (Round Robin) lists,
  232. + * so that new flows have priority on old ones.
  233. + *
  234. + * For a given flow, packets are not reordered (CoDel uses a FIFO)
  235. + * head drops only.
  236. + * ECN capability is on by default.
  237. + * Low memory footprint (64 bytes per flow)
  238. + */
  239. +
  240. +struct fq_codel_flow {
  241. + struct sk_buff *head;
  242. + struct sk_buff *tail;
  243. + struct list_head flowchain;
  244. + int deficit;
  245. + u32 dropped; /* number of drops (or ECN marks) on this flow */
  246. + struct codel_vars cvars;
  247. +}; /* please try to keep this structure <= 64 bytes */
  248. +
  249. +struct fq_codel_sched_data {
  250. + struct tcf_proto *filter_list; /* optional external classifier */
  251. + struct fq_codel_flow *flows; /* Flows table [flows_cnt] */
  252. + u32 *backlogs; /* backlog table [flows_cnt] */
  253. + u32 flows_cnt; /* number of flows */
  254. + u32 perturbation; /* hash perturbation */
  255. + u32 quantum; /* psched_mtu(qdisc_dev(sch)); */
  256. + struct codel_params cparams;
  257. + struct codel_stats cstats;
  258. + u32 drop_overlimit;
  259. + u32 new_flow_count;
  260. +
  261. + struct list_head new_flows; /* list of new flows */
  262. + struct list_head old_flows; /* list of old flows */
  263. +};
  264. +
  265. +static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q,
  266. + const struct sk_buff *skb)
  267. +{
  268. + struct flow_keys keys;
  269. + unsigned int hash;
  270. +
  271. + skb_flow_dissect(skb, &keys);
  272. + hash = jhash_3words((__force u32)keys.dst,
  273. + (__force u32)keys.src ^ keys.ip_proto,
  274. + (__force u32)keys.ports, q->perturbation);
  275. + return ((u64)hash * q->flows_cnt) >> 32;
  276. +}
  277. +
  278. +static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
  279. + int *qerr)
  280. +{
  281. + struct fq_codel_sched_data *q = qdisc_priv(sch);
  282. + struct tcf_result res;
  283. + int result;
  284. +
  285. + if (TC_H_MAJ(skb->priority) == sch->handle &&
  286. + TC_H_MIN(skb->priority) > 0 &&
  287. + TC_H_MIN(skb->priority) <= q->flows_cnt)
  288. + return TC_H_MIN(skb->priority);
  289. +
  290. + if (!q->filter_list)
  291. + return fq_codel_hash(q, skb) + 1;
  292. +
  293. + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
  294. + result = tc_classify(skb, q->filter_list, &res);
  295. + if (result >= 0) {
  296. +#ifdef CONFIG_NET_CLS_ACT
  297. + switch (result) {
  298. + case TC_ACT_STOLEN:
  299. + case TC_ACT_QUEUED:
  300. + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
  301. + case TC_ACT_SHOT:
  302. + return 0;
  303. + }
  304. +#endif
  305. + if (TC_H_MIN(res.classid) <= q->flows_cnt)
  306. + return TC_H_MIN(res.classid);
  307. + }
  308. + return 0;
  309. +}
  310. +
  311. +/* helper functions : might be changed when/if skb use a standard list_head */
  312. +
  313. +/* remove one skb from head of slot queue */
  314. +static inline struct sk_buff *dequeue_head(struct fq_codel_flow *flow)
  315. +{
  316. + struct sk_buff *skb = flow->head;
  317. +
  318. + flow->head = skb->next;
  319. + skb->next = NULL;
  320. + return skb;
  321. +}
  322. +
  323. +/* add skb to flow queue (tail add) */
  324. +static inline void flow_queue_add(struct fq_codel_flow *flow,
  325. + struct sk_buff *skb)
  326. +{
  327. + if (flow->head == NULL)
  328. + flow->head = skb;
  329. + else
  330. + flow->tail->next = skb;
  331. + flow->tail = skb;
  332. + skb->next = NULL;
  333. +}
  334. +
  335. +static unsigned int fq_codel_drop(struct Qdisc *sch)
  336. +{
  337. + struct fq_codel_sched_data *q = qdisc_priv(sch);
  338. + struct sk_buff *skb;
  339. + unsigned int maxbacklog = 0, idx = 0, i, len;
  340. + struct fq_codel_flow *flow;
  341. +
  342. + /* Queue is full! Find the fat flow and drop packet from it.
  343. + * This might sound expensive, but with 1024 flows, we scan
  344. + * 4KB of memory, and we dont need to handle a complex tree
  345. + * in fast path (packet queue/enqueue) with many cache misses.
  346. + */
  347. + for (i = 0; i < q->flows_cnt; i++) {
  348. + if (q->backlogs[i] > maxbacklog) {
  349. + maxbacklog = q->backlogs[i];
  350. + idx = i;
  351. + }
  352. + }
  353. + flow = &q->flows[idx];
  354. + skb = dequeue_head(flow);
  355. + len = qdisc_pkt_len(skb);
  356. + q->backlogs[idx] -= len;
  357. + kfree_skb(skb);
  358. + sch->q.qlen--;
  359. + sch->qstats.drops++;
  360. + sch->qstats.backlog -= len;
  361. + flow->dropped++;
  362. + return idx;
  363. +}
  364. +
  365. +static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
  366. +{
  367. + struct fq_codel_sched_data *q = qdisc_priv(sch);
  368. + unsigned int idx;
  369. + struct fq_codel_flow *flow;
  370. + int uninitialized_var(ret);
  371. +
  372. + idx = fq_codel_classify(skb, sch, &ret);
  373. + if (idx == 0) {
  374. + if (ret & __NET_XMIT_BYPASS)
  375. + sch->qstats.drops++;
  376. + kfree_skb(skb);
  377. + return ret;
  378. + }
  379. + idx--;
  380. +
  381. + codel_set_enqueue_time(skb);
  382. + flow = &q->flows[idx];
  383. + flow_queue_add(flow, skb);
  384. + q->backlogs[idx] += qdisc_pkt_len(skb);
  385. + sch->qstats.backlog += qdisc_pkt_len(skb);
  386. +
  387. + if (list_empty(&flow->flowchain)) {
  388. + list_add_tail(&flow->flowchain, &q->new_flows);
  389. + codel_vars_init(&flow->cvars);
  390. + q->new_flow_count++;
  391. + flow->deficit = q->quantum;
  392. + flow->dropped = 0;
  393. + }
  394. + if (++sch->q.qlen < sch->limit)
  395. + return NET_XMIT_SUCCESS;
  396. +
  397. + q->drop_overlimit++;
  398. + /* Return Congestion Notification only if we dropped a packet
  399. + * from this flow.
  400. + */
  401. + if (fq_codel_drop(sch) == idx)
  402. + return NET_XMIT_CN;
  403. +
  404. + /* As we dropped a packet, better let upper stack know this */
  405. + qdisc_tree_decrease_qlen(sch, 1);
  406. + return NET_XMIT_SUCCESS;
  407. +}
  408. +
  409. +/* This is the specific function called from codel_dequeue()
  410. + * to dequeue a packet from queue. Note: backlog is handled in
  411. + * codel, we dont need to reduce it here.
  412. + */
  413. +static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch)
  414. +{
  415. + struct fq_codel_flow *flow;
  416. + struct sk_buff *skb = NULL;
  417. +
  418. + flow = container_of(vars, struct fq_codel_flow, cvars);
  419. + if (flow->head) {
  420. + skb = dequeue_head(flow);
  421. + sch->qstats.backlog -= qdisc_pkt_len(skb);
  422. + sch->q.qlen--;
  423. + }
  424. + return skb;
  425. +}
  426. +
  427. +static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
  428. +{
  429. + struct fq_codel_sched_data *q = qdisc_priv(sch);
  430. + struct sk_buff *skb;
  431. + struct fq_codel_flow *flow;
  432. + struct list_head *head;
  433. + u32 prev_drop_count, prev_ecn_mark;
  434. +
  435. +begin:
  436. + head = &q->new_flows;
  437. + if (list_empty(head)) {
  438. + head = &q->old_flows;
  439. + if (list_empty(head))
  440. + return NULL;
  441. + }
  442. + flow = list_first_entry(head, struct fq_codel_flow, flowchain);
  443. +
  444. + if (flow->deficit <= 0) {
  445. + flow->deficit += q->quantum;
  446. + list_move_tail(&flow->flowchain, &q->old_flows);
  447. + goto begin;
  448. + }
  449. +
  450. + prev_drop_count = q->cstats.drop_count;
  451. + prev_ecn_mark = q->cstats.ecn_mark;
  452. +
  453. + skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats,
  454. + dequeue, &q->backlogs[flow - q->flows]);
  455. +
  456. + flow->dropped += q->cstats.drop_count - prev_drop_count;
  457. + flow->dropped += q->cstats.ecn_mark - prev_ecn_mark;
  458. +
  459. + if (!skb) {
  460. + /* force a pass through old_flows to prevent starvation */
  461. + if ((head == &q->new_flows) && !list_empty(&q->old_flows))
  462. + list_move_tail(&flow->flowchain, &q->old_flows);
  463. + else
  464. + list_del_init(&flow->flowchain);
  465. + goto begin;
  466. + }
  467. + qdisc_bstats_update(sch, skb);
  468. + flow->deficit -= qdisc_pkt_len(skb);
  469. + /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
  470. + * or HTB crashes. Defer it for next round.
  471. + */
  472. + if (q->cstats.drop_count && sch->q.qlen) {
  473. + qdisc_tree_decrease_qlen(sch, q->cstats.drop_count);
  474. + q->cstats.drop_count = 0;
  475. + }
  476. + return skb;
  477. +}
  478. +
  479. +static void fq_codel_reset(struct Qdisc *sch)
  480. +{
  481. + struct sk_buff *skb;
  482. +
  483. + while ((skb = fq_codel_dequeue(sch)) != NULL)
  484. + kfree_skb(skb);
  485. +}
  486. +
  487. +static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
  488. + [TCA_FQ_CODEL_TARGET] = { .type = NLA_U32 },
  489. + [TCA_FQ_CODEL_LIMIT] = { .type = NLA_U32 },
  490. + [TCA_FQ_CODEL_INTERVAL] = { .type = NLA_U32 },
  491. + [TCA_FQ_CODEL_ECN] = { .type = NLA_U32 },
  492. + [TCA_FQ_CODEL_FLOWS] = { .type = NLA_U32 },
  493. + [TCA_FQ_CODEL_QUANTUM] = { .type = NLA_U32 },
  494. +};
  495. +
  496. +static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
  497. +{
  498. + struct fq_codel_sched_data *q = qdisc_priv(sch);
  499. + struct nlattr *tb[TCA_FQ_CODEL_MAX + 1];
  500. + int err;
  501. +
  502. + if (!opt)
  503. + return -EINVAL;
  504. +
  505. + err = nla_parse_nested(tb, TCA_FQ_CODEL_MAX, opt, fq_codel_policy);
  506. + if (err < 0)
  507. + return err;
  508. + if (tb[TCA_FQ_CODEL_FLOWS]) {
  509. + if (q->flows)
  510. + return -EINVAL;
  511. + q->flows_cnt = nla_get_u32(tb[TCA_FQ_CODEL_FLOWS]);
  512. + if (!q->flows_cnt ||
  513. + q->flows_cnt > 65536)
  514. + return -EINVAL;
  515. + }
  516. + sch_tree_lock(sch);
  517. +
  518. + if (tb[TCA_FQ_CODEL_TARGET]) {
  519. + u64 target = nla_get_u32(tb[TCA_FQ_CODEL_TARGET]);
  520. +
  521. + q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT;
  522. + }
  523. +
  524. + if (tb[TCA_FQ_CODEL_INTERVAL]) {
  525. + u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]);
  526. +
  527. + q->cparams.interval = (interval * NSEC_PER_USEC) >> CODEL_SHIFT;
  528. + }
  529. +
  530. + if (tb[TCA_FQ_CODEL_LIMIT])
  531. + sch->limit = nla_get_u32(tb[TCA_FQ_CODEL_LIMIT]);
  532. +
  533. + if (tb[TCA_FQ_CODEL_ECN])
  534. + q->cparams.ecn = !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]);
  535. +
  536. + if (tb[TCA_FQ_CODEL_QUANTUM])
  537. + q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
  538. +
  539. + while (sch->q.qlen > sch->limit) {
  540. + struct sk_buff *skb = fq_codel_dequeue(sch);
  541. +
  542. + kfree_skb(skb);
  543. + q->cstats.drop_count++;
  544. + }
  545. + qdisc_tree_decrease_qlen(sch, q->cstats.drop_count);
  546. + q->cstats.drop_count = 0;
  547. +
  548. + sch_tree_unlock(sch);
  549. + return 0;
  550. +}
  551. +
  552. +static void *fq_codel_zalloc(size_t sz)
  553. +{
  554. + void *ptr = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN);
  555. +
  556. + if (!ptr)
  557. + ptr = vzalloc(sz);
  558. + return ptr;
  559. +}
  560. +
  561. +static void fq_codel_free(void *addr)
  562. +{
  563. + if (addr) {
  564. + if (is_vmalloc_addr(addr))
  565. + vfree(addr);
  566. + else
  567. + kfree(addr);
  568. + }
  569. +}
  570. +
  571. +static void fq_codel_destroy(struct Qdisc *sch)
  572. +{
  573. + struct fq_codel_sched_data *q = qdisc_priv(sch);
  574. +
  575. + tcf_destroy_chain(&q->filter_list);
  576. + fq_codel_free(q->backlogs);
  577. + fq_codel_free(q->flows);
  578. +}
  579. +
  580. +static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
  581. +{
  582. + struct fq_codel_sched_data *q = qdisc_priv(sch);
  583. + int i;
  584. +
  585. + sch->limit = 10*1024;
  586. + q->flows_cnt = 1024;
  587. + q->quantum = psched_mtu(qdisc_dev(sch));
  588. + q->perturbation = net_random();
  589. + INIT_LIST_HEAD(&q->new_flows);
  590. + INIT_LIST_HEAD(&q->old_flows);
  591. + codel_params_init(&q->cparams);
  592. + codel_stats_init(&q->cstats);
  593. + q->cparams.ecn = true;
  594. +
  595. + if (opt) {
  596. + int err = fq_codel_change(sch, opt);
  597. + if (err)
  598. + return err;
  599. + }
  600. +
  601. + if (!q->flows) {
  602. + q->flows = fq_codel_zalloc(q->flows_cnt *
  603. + sizeof(struct fq_codel_flow));
  604. + if (!q->flows)
  605. + return -ENOMEM;
  606. + q->backlogs = fq_codel_zalloc(q->flows_cnt * sizeof(u32));
  607. + if (!q->backlogs) {
  608. + fq_codel_free(q->flows);
  609. + return -ENOMEM;
  610. + }
  611. + for (i = 0; i < q->flows_cnt; i++) {
  612. + struct fq_codel_flow *flow = q->flows + i;
  613. +
  614. + INIT_LIST_HEAD(&flow->flowchain);
  615. + }
  616. + }
  617. + if (sch->limit >= 1)
  618. + sch->flags |= TCQ_F_CAN_BYPASS;
  619. + else
  620. + sch->flags &= ~TCQ_F_CAN_BYPASS;
  621. + return 0;
  622. +}
  623. +
  624. +static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
  625. +{
  626. + struct fq_codel_sched_data *q = qdisc_priv(sch);
  627. + struct nlattr *opts;
  628. +
  629. + opts = nla_nest_start(skb, TCA_OPTIONS);
  630. + if (opts == NULL)
  631. + goto nla_put_failure;
  632. +
  633. + if (nla_put_u32(skb, TCA_FQ_CODEL_TARGET,
  634. + codel_time_to_us(q->cparams.target)) ||
  635. + nla_put_u32(skb, TCA_FQ_CODEL_LIMIT,
  636. + sch->limit) ||
  637. + nla_put_u32(skb, TCA_FQ_CODEL_INTERVAL,
  638. + codel_time_to_us(q->cparams.interval)) ||
  639. + nla_put_u32(skb, TCA_FQ_CODEL_ECN,
  640. + q->cparams.ecn) ||
  641. + nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM,
  642. + q->quantum) ||
  643. + nla_put_u32(skb, TCA_FQ_CODEL_FLOWS,
  644. + q->flows_cnt))
  645. + goto nla_put_failure;
  646. +
  647. + nla_nest_end(skb, opts);
  648. + return skb->len;
  649. +
  650. +nla_put_failure:
  651. + return -1;
  652. +}
  653. +
  654. +static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
  655. +{
  656. + struct fq_codel_sched_data *q = qdisc_priv(sch);
  657. + struct tc_fq_codel_xstats st = {
  658. + .type = TCA_FQ_CODEL_XSTATS_QDISC,
  659. + .qdisc_stats.maxpacket = q->cstats.maxpacket,
  660. + .qdisc_stats.drop_overlimit = q->drop_overlimit,
  661. + .qdisc_stats.ecn_mark = q->cstats.ecn_mark,
  662. + .qdisc_stats.new_flow_count = q->new_flow_count,
  663. + };
  664. + struct list_head *pos;
  665. +
  666. + list_for_each(pos, &q->new_flows)
  667. + st.qdisc_stats.new_flows_len++;
  668. +
  669. + list_for_each(pos, &q->old_flows)
  670. + st.qdisc_stats.old_flows_len++;
  671. +
  672. + return gnet_stats_copy_app(d, &st, sizeof(st));
  673. +}
  674. +
  675. +static struct Qdisc *fq_codel_leaf(struct Qdisc *sch, unsigned long arg)
  676. +{
  677. + return NULL;
  678. +}
  679. +
  680. +static unsigned long fq_codel_get(struct Qdisc *sch, u32 classid)
  681. +{
  682. + return 0;
  683. +}
  684. +
  685. +static unsigned long fq_codel_bind(struct Qdisc *sch, unsigned long parent,
  686. + u32 classid)
  687. +{
  688. + /* we cannot bypass queue discipline anymore */
  689. + sch->flags &= ~TCQ_F_CAN_BYPASS;
  690. + return 0;
  691. +}
  692. +
  693. +static void fq_codel_put(struct Qdisc *q, unsigned long cl)
  694. +{
  695. +}
  696. +
  697. +static struct tcf_proto **fq_codel_find_tcf(struct Qdisc *sch, unsigned long cl)
  698. +{
  699. + struct fq_codel_sched_data *q = qdisc_priv(sch);
  700. +
  701. + if (cl)
  702. + return NULL;
  703. + return &q->filter_list;
  704. +}
  705. +
  706. +static int fq_codel_dump_class(struct Qdisc *sch, unsigned long cl,
  707. + struct sk_buff *skb, struct tcmsg *tcm)
  708. +{
  709. + tcm->tcm_handle |= TC_H_MIN(cl);
  710. + return 0;
  711. +}
  712. +
  713. +static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl,
  714. + struct gnet_dump *d)
  715. +{
  716. + struct fq_codel_sched_data *q = qdisc_priv(sch);
  717. + u32 idx = cl - 1;
  718. + struct gnet_stats_queue qs = { 0 };
  719. + struct tc_fq_codel_xstats xstats;
  720. +
  721. + if (idx < q->flows_cnt) {
  722. + const struct fq_codel_flow *flow = &q->flows[idx];
  723. + const struct sk_buff *skb = flow->head;
  724. +
  725. + memset(&xstats, 0, sizeof(xstats));
  726. + xstats.type = TCA_FQ_CODEL_XSTATS_CLASS;
  727. + xstats.class_stats.deficit = flow->deficit;
  728. + xstats.class_stats.ldelay =
  729. + codel_time_to_us(flow->cvars.ldelay);
  730. + xstats.class_stats.count = flow->cvars.count;
  731. + xstats.class_stats.lastcount = flow->cvars.lastcount;
  732. + xstats.class_stats.dropping = flow->cvars.dropping;
  733. + if (flow->cvars.dropping) {
  734. + codel_tdiff_t delta = flow->cvars.drop_next -
  735. + codel_get_time();
  736. +
  737. + xstats.class_stats.drop_next = (delta >= 0) ?
  738. + codel_time_to_us(delta) :
  739. + -codel_time_to_us(-delta);
  740. + }
  741. + while (skb) {
  742. + qs.qlen++;
  743. + skb = skb->next;
  744. + }
  745. + qs.backlog = q->backlogs[idx];
  746. + qs.drops = flow->dropped;
  747. + }
  748. + if (gnet_stats_copy_queue(d, &qs) < 0)
  749. + return -1;
  750. + if (idx < q->flows_cnt)
  751. + return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
  752. + return 0;
  753. +}
  754. +
  755. +static void fq_codel_walk(struct Qdisc *sch, struct qdisc_walker *arg)
  756. +{
  757. + struct fq_codel_sched_data *q = qdisc_priv(sch);
  758. + unsigned int i;
  759. +
  760. + if (arg->stop)
  761. + return;
  762. +
  763. + for (i = 0; i < q->flows_cnt; i++) {
  764. + if (list_empty(&q->flows[i].flowchain) ||
  765. + arg->count < arg->skip) {
  766. + arg->count++;
  767. + continue;
  768. + }
  769. + if (arg->fn(sch, i + 1, arg) < 0) {
  770. + arg->stop = 1;
  771. + break;
  772. + }
  773. + arg->count++;
  774. + }
  775. +}
  776. +
  777. +static const struct Qdisc_class_ops fq_codel_class_ops = {
  778. + .leaf = fq_codel_leaf,
  779. + .get = fq_codel_get,
  780. + .put = fq_codel_put,
  781. + .tcf_chain = fq_codel_find_tcf,
  782. + .bind_tcf = fq_codel_bind,
  783. + .unbind_tcf = fq_codel_put,
  784. + .dump = fq_codel_dump_class,
  785. + .dump_stats = fq_codel_dump_class_stats,
  786. + .walk = fq_codel_walk,
  787. +};
  788. +
  789. +static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
  790. + .cl_ops = &fq_codel_class_ops,
  791. + .id = "fq_codel",
  792. + .priv_size = sizeof(struct fq_codel_sched_data),
  793. + .enqueue = fq_codel_enqueue,
  794. + .dequeue = fq_codel_dequeue,
  795. + .peek = qdisc_peek_dequeued,
  796. + .drop = fq_codel_drop,
  797. + .init = fq_codel_init,
  798. + .reset = fq_codel_reset,
  799. + .destroy = fq_codel_destroy,
  800. + .change = fq_codel_change,
  801. + .dump = fq_codel_dump,
  802. + .dump_stats = fq_codel_dump_stats,
  803. + .owner = THIS_MODULE,
  804. +};
  805. +
  806. +static int __init fq_codel_module_init(void)
  807. +{
  808. + return register_qdisc(&fq_codel_qdisc_ops);
  809. +}
  810. +
  811. +static void __exit fq_codel_module_exit(void)
  812. +{
  813. + unregister_qdisc(&fq_codel_qdisc_ops);
  814. +}
  815. +
  816. +module_init(fq_codel_module_init)
  817. +module_exit(fq_codel_module_exit)
  818. +MODULE_AUTHOR("Eric Dumazet");
  819. +MODULE_LICENSE("GPL");