From 49dab7a96af52de9c5dfae554132b681d92a018d Mon Sep 17 00:00:00 2001 From: Krzysztof Mazur Date: Mon, 23 Jan 2012 00:27:57 +0100 Subject: [PATCH] SOC queueing discipline SOC is a SFQ qdisc with changed hash. SOC use only source or destination address for hash. Signed-off-by: Krzysztof Mazur --- include/linux/pkt_sched.h | 18 ++- net/sched/Kconfig | 8 + net/sched/Makefile | 1 + net/sched/sch_soc.c | 541 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 567 insertions(+), 1 deletions(-) create mode 100644 net/sched/sch_soc.c diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index c533670..ec1d5e6 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -166,8 +166,24 @@ struct tc_sfq_xstats { __s32 allot; }; +/* SOC section */ + +struct tc_soc_qopt { + unsigned quantum; /* Bytes per round allocated to flow */ + int hashf; /* Period of hash perturbation */ + __u32 limit; /* Maximal packets in queue */ + unsigned divisor; /* Hash divisor */ + unsigned flows; /* Maximal number of flows */ +}; + +struct tc_soc_xstats +{ + __s32 allot; +}; + /* - * NOTE: limit, divisor and flows are hardwired to code at the moment. + * (SOC & SFQ)NOTE: limit, divisor and flows are hardwired to code at the + * moment. * * limit=flows=128, divisor=1024; * diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 2590e91..1a0f10d 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -148,6 +148,14 @@ config NET_SCH_SFQ To compile this code as a module, choose M here: the module will be called sch_sfq. +config NET_SCH_SOC + tristate "SOC queue" + depends on NET_SCHED + ---help--- + Socialistic Queue is a SFQ based queue with static destination or + source hash(hashf=0). It can be used in ISP with other queues(such + as prio, htb, cfq, ...). + config NET_SCH_TEQL tristate "True Link Equalizer (TEQL)" ---help--- diff --git a/net/sched/Makefile b/net/sched/Makefile index dc5889c..aee0d86 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o +obj-$(CONFIG_NET_SCH_SOC) += sch_soc.o obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o diff --git a/net/sched/sch_soc.c b/net/sched/sch_soc.c new file mode 100644 index 0000000..7545d37 --- /dev/null +++ b/net/sched/sch_soc.c @@ -0,0 +1,541 @@ +/* + * net/sched/sch_soc.c Socialistic Queueing discipline. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Krzysztof Mazur + * based on SFQ by Alexey Kuznetsov, + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Socialistic Queuing algorithm. + ======================================= + + Simple round-robin per-IP scheduling. + + ADVANTAGE: + + - It is very cheap. Both CPU and memory requirements are minimal. + + DRAWBACKS: + + - "Stochastic" -> It is not 100% fair. + When hash collisions occur, several IPs are considered as one. + + - "Round-robin" -> It introduces larger delays than virtual clock + based schemes, and should not be used for isolating interactive + traffic from non-interactive. It means, that this scheduler + should be used as leaf of CBQ or P3, which put interactive traffic + to higher priority band. + + IMPLEMENTATION: + This implementation limits maximal queue length to 128; + maximal mtu to 2^15-1; number of hash buckets to 1024. + The only goal of this restrictions was that all data + fit into one 4K page :-). Struct soc_sched_data is + organized in anti-cache manner: all the data for a bucket + are scattered over different locations. This is not good, + but it allowed me to put it into 4K. + + It is easy to increase these values, but not in flight. */ + +#define SFQ_DEPTH 128 +#define SFQ_HASH_DIVISOR 1024 + +/* This type should contain at least SFQ_DEPTH*2 values */ +typedef unsigned char soc_index; + +struct soc_head +{ + soc_index next; + soc_index prev; +}; + +struct soc_sched_data +{ +/* Parameters */ + int hashf; + unsigned quantum; /* Allotment per round: MUST BE >= MTU */ + int limit; + +/* Variables */ + struct tcf_proto *filter_list; + soc_index tail; /* Index of current slot in round */ + soc_index max_depth; /* Maximal depth */ + + soc_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ + soc_index next[SFQ_DEPTH]; /* Active slots link */ + short allot[SFQ_DEPTH]; /* Current allotment per slot */ + unsigned short hash[SFQ_DEPTH]; /* Hash value indexed by slots */ + struct sk_buff_head qs[SFQ_DEPTH]; /* Slot queue */ + struct soc_head dep[SFQ_DEPTH*2]; /* Linked list of slots, indexed by depth */ +}; + +static unsigned soc_hash(struct soc_sched_data *q, struct sk_buff *skb) +{ + u32 h; + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): { + struct iphdr *iph = ip_hdr(skb); + h = (q->hashf) ? iph->daddr : iph->saddr; + break; + } + case __constant_htons(ETH_P_IPV6): { + struct ipv6hdr *iph = ipv6_hdr(skb); + h = ((q->hashf) ? iph->daddr : iph->saddr).s6_addr32[3]; + break; + } + default: { + h = (u32)(unsigned long)skb_dst(skb); + } + } + return (htonl(h) & 0x3ff); +} + +static unsigned int soc_classify(struct sk_buff *skb, struct Qdisc *sch, + int *qerr) +{ + struct soc_sched_data *q = qdisc_priv(sch); + struct tcf_result res; + int result; + + if (TC_H_MAJ(skb->priority) == sch->handle && + TC_H_MIN(skb->priority) > 0 && + TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR) + return TC_H_MIN(skb->priority); + + if (!q->filter_list) + return soc_hash(q, skb) + 1; + + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + result = tc_classify(skb, q->filter_list, &res); + if (result >= 0) { +#ifdef CONFIG_NET_CLS_ACT + switch (result) { + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + case TC_ACT_SHOT: + return 0; + } +#endif + if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR) + return TC_H_MIN(res.classid); + } + return 0; +} + +static inline void soc_link(struct soc_sched_data *q, soc_index x) +{ + soc_index p, n; + int d = q->qs[x].qlen + SFQ_DEPTH; + + p = d; + n = q->dep[d].next; + q->dep[x].next = n; + q->dep[x].prev = p; + q->dep[p].next = q->dep[n].prev = x; +} + +static inline void soc_dec(struct soc_sched_data *q, soc_index x) +{ + soc_index p, n; + + n = q->dep[x].next; + p = q->dep[x].prev; + q->dep[p].next = n; + q->dep[n].prev = p; + + if (n == p && q->max_depth == q->qs[x].qlen + 1) + q->max_depth--; + + soc_link(q, x); +} + +static inline void soc_inc(struct soc_sched_data *q, soc_index x) +{ + soc_index p, n; + int d; + + n = q->dep[x].next; + p = q->dep[x].prev; + q->dep[p].next = n; + q->dep[n].prev = p; + d = q->qs[x].qlen; + if (q->max_depth < d) + q->max_depth = d; + + soc_link(q, x); +} + +static unsigned int soc_drop(struct Qdisc *sch) +{ + struct soc_sched_data *q = qdisc_priv(sch); + soc_index d = q->max_depth; + struct sk_buff *skb; + unsigned int len; + + /* Queue is full! Find the longest slot and + drop a packet from it */ + + if (d > 1) { + soc_index x = q->dep[d + SFQ_DEPTH].next; + skb = q->qs[x].prev; + len = qdisc_pkt_len(skb); + __skb_unlink(skb, &q->qs[x]); + kfree_skb(skb); + soc_dec(q, x); + sch->q.qlen--; + sch->qstats.drops++; + sch->qstats.backlog -= len; + return len; + } + + if (d == 1) { + /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ + d = q->next[q->tail]; + q->next[q->tail] = q->next[d]; + q->allot[q->next[d]] += q->quantum; + skb = q->qs[d].prev; + len = qdisc_pkt_len(skb); + __skb_unlink(skb, &q->qs[d]); + kfree_skb(skb); + soc_dec(q, d); + sch->q.qlen--; + q->ht[q->hash[d]] = SFQ_DEPTH; + sch->qstats.drops++; + sch->qstats.backlog -= len; + return len; + } + + return 0; +} + +static int +soc_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct soc_sched_data *q = qdisc_priv(sch); + unsigned int hash; + soc_index x; + int uninitialized_var(ret); + + hash = soc_classify(skb, sch, &ret); + if (hash == 0) { + if (ret & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return ret; + } + hash--; + + x = q->ht[hash]; + if (x == SFQ_DEPTH) { + q->ht[hash] = x = q->dep[SFQ_DEPTH].next; + q->hash[x] = hash; + } + + /* If selected queue has length q->limit, this means that + * all another queues are empty and that we do simple tail drop, + * i.e. drop _this_ packet. + */ + if (q->qs[x].qlen >= q->limit) + return qdisc_drop(skb, sch); + + sch->qstats.backlog += qdisc_pkt_len(skb); + __skb_queue_tail(&q->qs[x], skb); + soc_inc(q, x); + if (q->qs[x].qlen == 1) { /* The flow is new */ + if (q->tail == SFQ_DEPTH) { /* It is the first flow */ + q->tail = x; + q->next[x] = x; + q->allot[x] = q->quantum; + } else { + q->next[x] = q->next[q->tail]; + q->next[q->tail] = x; + q->tail = x; + } + } + if (++sch->q.qlen <= q->limit) { + sch->bstats.bytes += qdisc_pkt_len(skb); + sch->bstats.packets++; + return 0; + } + + soc_drop(sch); + return NET_XMIT_CN; +} + +static struct sk_buff * +soc_peek(struct Qdisc *sch) +{ + struct soc_sched_data *q = qdisc_priv(sch); + soc_index a; + + /* No active slots */ + if (q->tail == SFQ_DEPTH) + return NULL; + + a = q->next[q->tail]; + return skb_peek(&q->qs[a]); +} + +static struct sk_buff * +soc_dequeue(struct Qdisc *sch) +{ + struct soc_sched_data *q = qdisc_priv(sch); + struct sk_buff *skb; + soc_index a, old_a; + + /* No active slots */ + if (q->tail == SFQ_DEPTH) + return NULL; + + a = old_a = q->next[q->tail]; + + /* Grab packet */ + skb = __skb_dequeue(&q->qs[a]); + soc_dec(q, a); + sch->q.qlen--; + sch->qstats.backlog -= qdisc_pkt_len(skb); + + /* Is the slot empty? */ + if (q->qs[a].qlen == 0) { + q->ht[q->hash[a]] = SFQ_DEPTH; + a = q->next[a]; + if (a == old_a) { + q->tail = SFQ_DEPTH; + return skb; + } + q->next[q->tail] = a; + q->allot[a] += q->quantum; + } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { + q->tail = a; + a = q->next[a]; + q->allot[a] += q->quantum; + } + return skb; +} + +static void +soc_reset(struct Qdisc *sch) +{ + struct sk_buff *skb; + + while ((skb = soc_dequeue(sch)) != NULL) + kfree_skb(skb); +} + +static int soc_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct soc_sched_data *q = qdisc_priv(sch); + struct tc_soc_qopt *ctl = nla_data(opt); + unsigned int qlen; + + if (opt->nla_len < nla_attr_size(sizeof(*ctl))) + return -EINVAL; + + sch_tree_lock(sch); + q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); + if (ctl->limit) + q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); + q->hashf = ctl->hashf; + + qlen = sch->q.qlen; + while (sch->q.qlen > q->limit) + soc_drop(sch); + qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); + + sch_tree_unlock(sch); + return 0; +} + +static int soc_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct soc_sched_data *q = qdisc_priv(sch); + int i; + + for (i = 0; i < SFQ_HASH_DIVISOR; i++) + q->ht[i] = SFQ_DEPTH; + + for (i = 0; i < SFQ_DEPTH; i++) { + skb_queue_head_init(&q->qs[i]); + q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH; + q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH; + } + + q->limit = SFQ_DEPTH - 1; + q->max_depth = 0; + q->tail = SFQ_DEPTH; + if (opt == NULL) { + q->quantum = psched_mtu(qdisc_dev(sch)); + } else { + int err = soc_change(sch, opt); + if (err) + return err; + } + + for (i = 0; i < SFQ_DEPTH; i++) + soc_link(q, i); + return 0; +} + +static void soc_destroy(struct Qdisc *sch) +{ + struct soc_sched_data *q = qdisc_priv(sch); + + tcf_destroy_chain(&q->filter_list); +} + +static int soc_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct soc_sched_data *q = qdisc_priv(sch); + unsigned char *b = skb_tail_pointer(skb); + struct tc_soc_qopt opt; + + opt.quantum = q->quantum; + opt.hashf = q->hashf; + + opt.limit = q->limit; + opt.divisor = SFQ_HASH_DIVISOR; + opt.flows = q->limit; + + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static unsigned long soc_get(struct Qdisc *sch, u32 classid) +{ + return 0; +} + +static struct tcf_proto **soc_find_tcf(struct Qdisc *sch, unsigned long cl) +{ + struct soc_sched_data *q = qdisc_priv(sch); + + if (cl) + return NULL; + return &q->filter_list; +} + +static int soc_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + tcm->tcm_handle |= TC_H_MIN(cl); + return 0; +} + +static int soc_dump_class_stats(struct Qdisc *sch, unsigned long cl, + struct gnet_dump *d) +{ + struct soc_sched_data *q = qdisc_priv(sch); + soc_index idx = q->ht[cl-1]; + struct gnet_stats_queue qs = { .qlen = q->qs[idx].qlen }; + struct tc_soc_xstats xstats = { .allot = q->allot[idx] }; + + if (gnet_stats_copy_queue(d, &qs) < 0) + return -1; + return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); +} + +static void soc_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct soc_sched_data *q = qdisc_priv(sch); + unsigned int i; + + if (arg->stop) + return; + + for (i = 0; i < SFQ_HASH_DIVISOR; i++) { + if (q->ht[i] == SFQ_DEPTH || + arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(sch, i + 1, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } +} + +static struct Qdisc *soc_leaf(struct Qdisc *sch, unsigned long arg) +{ + return NULL; +} + +static unsigned long soc_bind(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + /* we cannot bypass queue discipline anymore */ + sch->flags &= ~TCQ_F_CAN_BYPASS; + return 0; +} + +static void soc_put(struct Qdisc *q, unsigned long cl) +{ +} + + +static const struct Qdisc_class_ops soc_class_ops = { + .leaf = soc_leaf, + .get = soc_get, + .put = soc_put, + .tcf_chain = soc_find_tcf, + .bind_tcf = soc_bind, + .unbind_tcf = soc_put, + .dump = soc_dump_class, + .dump_stats = soc_dump_class_stats, + .walk = soc_walk, +}; + +static struct Qdisc_ops soc_qdisc_ops __read_mostly = { + .cl_ops = &soc_class_ops, + .id = "soc", + .priv_size = sizeof(struct soc_sched_data), + .enqueue = soc_enqueue, + .dequeue = soc_dequeue, + .peek = soc_peek, + .drop = soc_drop, + .init = soc_init, + .reset = soc_reset, + .destroy = soc_destroy, + .change = NULL, + .dump = soc_dump, + .owner = THIS_MODULE, +}; + +static int __init soc_module_init(void) +{ + return register_qdisc(&soc_qdisc_ops); +} +static void __exit soc_module_exit(void) +{ + unregister_qdisc(&soc_qdisc_ops); +} +module_init(soc_module_init) +module_exit(soc_module_exit) +MODULE_LICENSE("GPL"); -- 1.7.6.4