/l3/users/gw.local/root :1 :2 :3 :4 :5 :6 :7 :8 :9 :10 :11 :12 :13 :14 :15 :16 :17 :18 :19 |
|
#make
HOSTCC scripts/basic/fixdep HOSTCC scripts/basic/docproc HOSTCC scripts/basic/hash HOSTCC scripts/kconfig/conf.o HOSTCC scripts/kconfig/kxgettext.o SHIPPED scripts/kconfig/zconf.tab.c SHIPPED scripts/kconfig/lex.zconf.c SHIPPED scripts/kconfig/zconf.hash.c HOSTCC scripts/kconfig/zconf.tab.o HOSTLD scripts/kconfig/conf ... CC [M] net/sched/sch_dsmark.o CC [M] net/sched/sch_sfq.o CC [M] net/sched/sch_esfq.o net/sched/sch_esfq.c: In function ‘esfq_hash’: net/sched/sch_esfq.c:177: error: ‘struct sk_buff’ has no member named ‘dst’ net/sched/sch_esfq.c: In function ‘esfq_dump’: net/sched/sch_esfq.c:652: warning: initialization makes pointer from integer without a cast make[2]: *** [net/sched/sch_esfq.o] Ошибка 1 make[1]: *** [net/sched] Ошибка 2 make: *** [net] Ошибка 2 |
#cd /var/downloads/
|
#tar zxvf ezshaper-1.1rc.tar.gz
ezshaper-1.1rc/ ezshaper-1.1rc/ezshaper.cfg ezshaper-1.1rc/ezshaper.sh ezshaper-1.1rc/ezshaper-1.1rc_err.txt ezshaper-1.1rc/ezshaper-1.1rc_loop.txt |
#cd ezshaper-1.1rc/
|
#less ezshaper-1.1rc_loop.txt
|
#less ezshaper.cfg
|
#man patch
|
#patch -p1 -b <../
.config.2.6.32.3 linux-2.6.32.3.tar.bz2 esfq.patch linux-2.6.32-imq-test2.diff esfq-patch-2.6.26 linux-2.6.32-imq-test2.diff.bz2 iptables-1.4.3.2-imq.patch linux-source-2.6.26/ iptables-1.4.3.2-imq_xt.patch linux-source-2.6.26-copy.tar.bz2 linux-2.6.32.1.tar.bz2 linux-source-2.6.26.tar.bz2 linux-2.6.32.3/ sched_esfq-2.6.29.patch |
#patch -p1 -b <../esfq-patch-2.6.26
patching file include/linux/pkt_cls.h Hunk #1 succeeded at 381 with fuzz 2 (offset 53 lines). (Patch is indented 3 spaces.) patching file net/sched/Kconfig patch: **** malformed patch at line 82: config NET_EMATCH |
#vim ../esfq-patch-2.6.26
--- /tmp/l3-saved-4220.4327.16066 2010-02-26 14:24:27.000000000 +0000 +++ ../esfq-patch-2.6.26 2010-02-26 14:25:25.000000000 +0000 @@ -58,14 +58,14 @@ + /* Basic filter */ - enum - diff --git a/net/sched/Kconfig b/net/sched/Kconfig - index 87af7c9..bccf42b 100644 - --- a/net/sched/Kconfig - +++ b/net/sched/Kconfig - @@ -307,6 +307,17 @@ config NET_CLS_RSVP6 - To compile this code as a module, choose M here: the - module will be called cls_rsvp6. +enum +diff --git a/net/sched/Kconfig b/net/sched/Kconfig +index 87af7c9..bccf42b 100644 +--- a/net/sched/Kconfig ++++ b/net/sched/Kconfig +@@ -307,6 +307,17 @@ config NET_CLS_RSVP6 + To compile this code as a module, choose M here: the + module will be called cls_rsvp6. +config NET_CLS_FLOW + tristate "Flow classifier" @@ -80,8 +80,8 @@ + config NET_EMATCH - bool "Extended Matches" - select NET_CLS + bool "Extended Matches" + select NET_CLS diff --git a/net/sched/Makefile b/net/sched/Makefile index 81ecbe8..1d2b0f7 100644 |
#patch -p1 -b <../esfq-patch-2.6.26
patching file include/linux/pkt_cls.h patch: **** malformed patch at line 61: enum |
#vim ../sched_esfq-2.6.29.patch
|
#cd ..
|
#vim esfq-maybe.patch
--- /tmp/l3-saved-4220.31389.7376 2010-02-26 14:35:31.000000000 +0000 +++ esfq-maybe.patch 2010-02-26 14:36:37.000000000 +0000 @@ -1,117 +1,3 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" - "http://www.w3.org/TR/html4/loose.dtd"> -<html> -<head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <title>[NET_SCHED 04/04]: Add flow classifier</title> - <link rel="stylesheet" href="main.css" media="screen"> - <link rel="contents" href="thrd2.html#60638"> - <link rel="index" href="mail2.html#60638"> - <link rel="prev" href="msg60637.html"> - <link rel="next" href="msg60717.html"> - <link rel="alternate" title="netdev RSS" href="http://www.mail-archive.com/netdev@vger.kernel.org/maillist.xml" TYPE="application/rss+xml"> -</head> -<body id="msg"> -<!--X-Body-Begin--> -<!--X-User-Header--> -<!--X-User-Header-End--> -<!--X-TopPNI--> -<!--htdig_noindex--> -<div id="msgcontainer"> - <a id="mail-archive-logo" href="/"></a> -<h1> - <span id="listlink"><a href="/netdev@vger.kernel.org/info.html">netdev</a></span> -</h1> - <form action="/search" method=get> - <ul id="topLinks"> - <li><span class="msglink"><a href="msg60637.html" accesskey="p" title="Earlier messages"><img src="/leftarrow.png" border="0" height="22" width="20" align="top" alt="left arrow"></a></span> <span class="threadlink"><a href="thrd2.html#60638" title="Messages by Thread" accesskey="c">Thread</a></span> <span class="msglink"><a href="msg60717.html" accesskey="n" title="Later messages"><img src="/rightarrow.png" border="0" height="22" width="20" align="top" alt="right arrow"></a></span></li> - <li><span class="msglink"><a href="msg60637.html" accesskey="b" title="Earlier messages"><img src="/leftarrow.png" border="0" height="22" width="20" align="top" alt="left arrow"></a></span> <span class="chronologicallink"><a href="mail2.html#60638" title="Messages by Date" accesskey="i">Date</a></span> <span class="msglink"><a href="msg60639.html" accesskey="f" title="Later messages"><img src="/rightarrow.png" border="0" height="22" width="20" align="top" alt="right arrow"></a></span></li> - <li> - <label> -<input type=text name="q" value="" size=25> -<input type="hidden" name="l" value="netdev@vger.kernel.org"> -<input type=submit value="Search"> -</label> - </li> - </ul> - </form> -</div> - -<div id="msgcontainer2"> - <div id="corner1"></div> - <div id="corner2"></div> - <div id="msgcontent"> -<!--/htdig_noindex--> - -<!--X-TopPNI-End--> -<!--X-MsgBody--> -<!--X-Subject-Header-Begin--> - <div class="msgHead"> - <h1><span class="subject">[NET_SCHED 04/04]: Add flow classifier</span></h1> - <p><span class="sender">Patrick McHardy</span><br> - <span class="date">Thu, 31 Jan 2008 10:13:10 -0800</span></p> - </div> -<!--X-Subject-Header-End--> -<!--X-Head-of-Message--> -<!--X-Head-of-Message-End--> -<!--X-Head-Body-Sep-Begin--> -<div class="msgBody"> -<!--X-Head-Body-Sep-End--> -<!--X-Body-of-Message--> -<pre>[NET_SCHED]: Add flow classifier - -Add new "flow" classifier, which is meant to extend the SFQ hashing -capabilities without hard-coding new hash functions and also allows -deterministic mappings of keys to classes, replacing some out of tree -iptables patches like IPCLASSIFY (maps IPs to classes), IPMARK (maps -IPs to marks, with fw filters to classes), ... - -Some examples: - -- Classic SFQ hash: - - tc filter add ... flow hash \ - keys src,dst,proto,proto-src,proto-dst divisor 1024 - -- Classic SFQ hash, but using information from conntrack to work properly in - combination with NAT: - - tc filter add ... flow hash \ - keys nfct-src,nfct-dst,proto,nfct-proto-src,nfct-proto-dst divisor 1024 - -- Map destination IPs of 192.168.0.0/24 to classids 1-257: - - tc filter add ... flow map \ - key dst addend -192.168.0.0 divisor 256 - -- alternatively: - - tc filter add ... flow map \ - key dst and 0xff - -- similar, but reverse ordered: - - tc filter add ... flow map \ - key dst and 0xff xor 0xff - -Perturbation is currently not supported because we can't reliable kill the -timer on destruction. - -Signed-off-by: Patrick McHardy <[EMAIL PROTECTED]> - ---- -commit 91a3a09ce63cba8df30ac42133a40dd64c0a7259 -tree 2572feb8ffd88e6abf9270d2137af2a4cf7f542a -parent 7a281f8ef334a35d699682315e9f80a3e006376c -author Patrick McHardy <[EMAIL PROTECTED]> Wed, 30 Jan 2008 21:59:31 +0100 -committer Patrick McHardy <[EMAIL PROTECTED]> Thu, 31 Jan 2008 18:52:56 +0100 - - include/linux/pkt_cls.h | 50 ++++ - net/sched/Kconfig | 11 + - net/sched/Makefile | 1 - net/sched/cls_flow.c | 660 +++++++++++++++++++++++++++++++++++++++++++++++ - 4 files changed, 722 insertions(+), 0 deletions(-) - diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index 30b8571..1c1dba9 100644 --- a/include/linux/pkt_cls.h @@ -874,101 +760,3 @@ +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <[EMAIL PROTECTED]>"); +MODULE_DESCRIPTION("TC flow classifier"); --- -To unsubscribe from this list: send the line "unsubscribe netdev" in -the body of a message to [EMAIL PROTECTED] -More majordomo info at <a rel="nofollow" href="http://vger.kernel.org/majordomo-info.html">http://vger.kernel.org/majordomo-info.html</a> -</pre> -<!--X-Body-of-Message-End--> -<!--X-MsgBody-End--> -<!--X-Follow-Ups--> -</div> <!-- end of msgBody class --> - -<!--htdig_noindex--> - -<div id="adbox"> - -</div> - -<div class="tSliceList"> -<ul> -<li> -<span class="subject"><a name="60634" href="msg60634.html">[NET_SCHED 00/04]: External SFQ classifiers/flow classifier</a></span> -<span class="sender">Patrick McHardy</span> -<ul> -<li> -<span class="subject"><a name="60635" href="msg60635.html">[NET_SCHED 01/04]: Constify struct tcf_ext_map</a></span> -<span class="sender">Patrick McHardy</span> -<!-- noop --> -<li> -<span class="subject"><a name="60636" href="msg60636.html">[NET_SCHED 02/04]: sch_sfq: add support for external classifiers</a></span> -<span class="sender">Patrick McHardy</span> -<!-- noop --> -<li> -<span class="subject"><a name="60637" href="msg60637.html">[NET_SCHED 03/04]: sch_sfq: make internal queues visible as classes</a></span> -<span class="sender">Patrick McHardy</span> -<!-- noop --> -<li> -<div class="tSliceCur"> -<span class="subject">[NET_SCHED 04/04]: Add flow classifier</span> -<span class="sender">Patrick McHardy</span> -</div> <!-- end of tSliceCur class --> -<!-- noop --> -<li> -<span class="subject"><a name="60717" href="msg60717.html">Re: [NET_SCHED 00/04]: External SFQ classifiers/flow classifier</a></span> -<span class="sender">David Miller</span> -<!-- noop --> -<li> -<span class="subject"><a name="60884" href="msg60884.html">Re: [NET_SCHED 00/04]: External SFQ classifiers/flow classifier</a></span> -<span class="sender">Corey Hickey</span> -<ul> -<li> -<span class="subject"><a name="60942" href="msg60942.html">Re: [NET_SCHED 00/04]: External SFQ classifiers/flow classifier</a></span> -<span class="sender">Patrick McHardy</span> -<ul> -<li> -<span class="subject"><a name="60948" href="msg60948.html">Re: [NET_SCHED 00/04]: External SFQ classifiers/flow classifier</a></span> -<span class="sender">Corey Hickey</span> -<!-- noop --> -</ul> -<!-- noop --> -</ul> -<!-- noop --> -</ul> -</li> -</ul> - -</div> <!-- end of tSliceList class --> -<!--/htdig_noindex--> -<!--X-Follow-Ups-End--> -<!--X-References--> -<!--X-References-End--> -<!--X-BotPNI--> -<!--htdig_noindex--> - <ul id="botLinks"> - <li><div class="reply"> -<FORM METHOD="POST" ACTION="/mailto.php"> - - -<INPUT TYPE="HIDDEN" NAME="subject" VALUE="[NET_SCHED 04/04]: Add flow classifier"> -<INPUT TYPE="HIDDEN" NAME="msgid" VALUE="20080131175804.25151.52004.sendpatchset@localhost.localdomain"> -Reply via email to<br> -<INPUT TYPE="SUBMIT" VALUE=" Patrick McHardy "> -</FORM> -</div> - </li> - </ul> -<!--/htdig_noindex--> - -<!--X-BotPNI-End--> -<!--X-User-Footer--> -<!--X-User-Footer-End--> -</div> <!-- end of msgcontent class --> -</div> <!-- end of msgcontainer2 class --> - -<!-- These extra divs/spans may be used as catch-alls to add extra imagery. --> -<div id="extraDiv1"><span></span></div><div id="extraDiv2"><span></span></div><div id="extraDiv3"><span></span></div> -<div id="extraDiv4"><span></span></div><div id="extraDiv5"><span></span></div><div id="extraDiv6"><span></span></div> - -</body> -</html> \ В конце файла нет новой строки |
#cd linux-2.6.32.3/
|
#cd ..
patch -p1 -b <../esfq-maybe.patch patching file include/linux/pkt_cls.h Hunk #1 succeeded at 431 with fuzz 2 (offset 103 lines). patching file net/sched/Kconfig Hunk #1 succeeded at 335 with fuzz 2 (offset 28 lines). patching file net/sched/Makefile Hunk #1 FAILED at 35. 1 out of 1 hunk FAILED -- saving rejects to file net/sched/Makefile.rej The next patch would create the file net/sched/cls_flow.c, which already exists! Assume -R? [n] y patching file net/sched/cls_flow.c patch: **** malformed patch at line 256: 2)); |
#cd ..
|
#cd ..
rm -R linux-2.6.32.3 |
#cd ..
tar jxvf linux-2.6.32.3.tar.bz2 linux-2.6.32.3/ linux-2.6.32.3/.gitignore linux-2.6.32.3/.mailmap linux-2.6.32.3/COPYING linux-2.6.32.3/CREDITS linux-2.6.32.3/Documentation/ linux-2.6.32.3/Documentation/00-INDEX linux-2.6.32.3/Documentation/ABI/ linux-2.6.32.3/Documentation/ABI/README ... linux-2.6.32.3/virt/kvm/Kconfig linux-2.6.32.3/virt/kvm/coalesced_mmio.c linux-2.6.32.3/virt/kvm/coalesced_mmio.h linux-2.6.32.3/virt/kvm/eventfd.c linux-2.6.32.3/virt/kvm/ioapic.c linux-2.6.32.3/virt/kvm/ioapic.h linux-2.6.32.3/virt/kvm/iodev.h linux-2.6.32.3/virt/kvm/iommu.c linux-2.6.32.3/virt/kvm/irq_comm.c linux-2.6.32.3/virt/kvm/kvm_main.c |
#cp ./.config.2.6.32.3 linux-2.6.32.3/.config
|
#cd linux-2.6.32.3/
|
#cd ..
patch -p1 -b <../linux-2.6.32-imq-test2.diff patching file drivers/net/imq.c patching file drivers/net/Kconfig patching file drivers/net/Makefile patching file include/linux/imq.h patching file include/linux/netdevice.h patching file include/linux/netfilter/xt_IMQ.h patching file include/linux/netfilter_ipv4/ipt_IMQ.h patching file include/linux/netfilter_ipv6/ip6t_IMQ.h patching file include/linux/skbuff.h patching file include/net/netfilter/nf_queue.h patching file net/core/dev.c patching file net/core/skbuff.c patching file net/netfilter/Kconfig patching file net/netfilter/Makefile patching file net/netfilter/nf_queue.c patching file net/netfilter/xt_IMQ.c |
#patch -p1 -b <../esfq-maybe.patch
patching file include/linux/pkt_cls.h Hunk #1 succeeded at 381 with fuzz 2 (offset 53 lines). patching file net/sched/Kconfig Hunk #1 succeeded at 335 with fuzz 2 (offset 28 lines). patching file net/sched/Makefile Hunk #1 FAILED at 35. 1 out of 1 hunk FAILED -- saving rejects to file net/sched/Makefile.rej The next patch would create the file net/sched/cls_flow.c, which already exists! Assume -R? [n] Apply anyway? [n] y patching file net/sched/cls_flow.c patch: **** malformed patch at line 256: 2)); |
#vim ../esfq-maybe.patch
--- /tmp/l3-saved-4220.24524.791 2010-02-26 14:38:56.000000000 +0000 +++ ../esfq-maybe.patch 2010-02-26 14:39:12.000000000 +0000 @@ -252,8 +252,7 @@ + + if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && + has_ports(iph->protocol)) -+ res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + -2)); ++ res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2)); + break; + } + case __constant_htons(ETH_P_IPV6): { |
#patch -p1 -b <../esfq-maybe.patch
patching file include/linux/pkt_cls.h Hunk #1 succeeded at 431 with fuzz 2 (offset 103 lines). patching file net/sched/Kconfig Hunk #1 succeeded at 346 with fuzz 2 (offset 39 lines). patching file net/sched/Makefile Hunk #1 FAILED at 35. 1 out of 1 hunk FAILED -- saving rejects to file net/sched/Makefile.rej The next patch would create the file net/sched/cls_flow.c, which already exists! Assume -R? [n] Apply anyway? [n] y patching file net/sched/cls_flow.c Patch attempted to create file net/sched/cls_flow.c, which already exists. Hunk #1 FAILED at 1. 1 out of 1 hunk FAILED -- saving rejects to file net/sched/cls_flow.c.rej |
#make
HOSTCC scripts/basic/fixdep HOSTCC scripts/basic/docproc HOSTCC scripts/basic/hash HOSTCC scripts/kconfig/conf.o HOSTCC scripts/kconfig/kxgettext.o SHIPPED scripts/kconfig/zconf.tab.c SHIPPED scripts/kconfig/lex.zconf.c SHIPPED scripts/kconfig/zconf.hash.c HOSTCC scripts/kconfig/zconf.tab.o HOSTLD scripts/kconfig/conf scripts/kconfig/conf -s arch/x86/Kconfig net/sched/Kconfig:339: invalid option net/sched/Kconfig:350: invalid option make[2]: *** [silentoldconfig] Ошибка 1 make[1]: *** [silentoldconfig] Ошибка 2 make: *** Нет правила для сборки цели `include/config/auto.conf', требуемой для `include/config/kernel.release'. Останов. |
#cd ..
|
#iptables -V
iptables v1.4.2 |
#vim esfq-mypatch
--- /dev/null 2010-02-26 12:46:03.368409969 +0000 +++ esfq-mypatch 2010-02-26 15:08:34.000000000 +0000 @@ -0,0 +1,796 @@ +--- a/include/linux/pkt_sched.h ++++ b/include/linux/pkt_sched.h +@@ -182,8 +182,37 @@ + * + * The only reason for this is efficiency, it is possible + * to change these parameters in compile time. ++ * ++ * If you need to play with these values, use esfq instead. + */ + ++/* ESFQ section */ ++ ++enum ++{ ++ /* traditional */ ++ TCA_SFQ_HASH_CLASSIC, ++ TCA_SFQ_HASH_DST, ++ TCA_SFQ_HASH_SRC, ++ TCA_SFQ_HASH_FWMARK, ++ /* conntrack */ ++ TCA_SFQ_HASH_CTORIGDST, ++ TCA_SFQ_HASH_CTORIGSRC, ++ TCA_SFQ_HASH_CTREPLDST, ++ TCA_SFQ_HASH_CTREPLSRC, ++ TCA_SFQ_HASH_CTNATCHG, ++}; ++ ++struct tc_esfq_qopt ++{ ++ unsigned quantum; /* Bytes per round allocated to flow */ ++ int perturb_period; /* Period of hash perturbation */ ++ __u32 limit; /* Maximal packets in queue */ ++ unsigned divisor; /* Hash divisor */ ++ unsigned flows; /* Maximal number of flows */ ++ unsigned hash_kind; /* Hash function to use for flow identification */ ++}; ++ + /* RED section */ + + enum +-- a/net/sched/Kconfig ++++ b/net/sched/Kconfig +@@ -137,6 +137,37 @@ + To compile this code as a module, choose M here: the + module will be called sch_sfq. + ++config NET_SCH_ESFQ ++ tristate "Enhanced Stochastic Fairness Queueing (ESFQ)" ++ ---help--- ++ Say Y here if you want to use the Enhanced Stochastic Fairness ++ Queueing (ESFQ) packet scheduling algorithm for some of your network ++ devices or as a leaf discipline for a classful qdisc such as HTB or ++ CBQ (see the top of <file:net/sched/sch_esfq.c> for details and ++ references to the SFQ algorithm). ++ ++ This is an enchanced SFQ version which allows you to control some ++ hardcoded values in the SFQ scheduler. ++ ++ ESFQ also adds control of the hash function used to identify packet ++ flows. The original SFQ discipline hashes by connection; ESFQ add ++ several other hashing methods, such as by src IP or by dst IP, which ++ can be more fair to users in some networking situations. ++ ++ To compile this code as a module, choose M here: the ++ module will be called sch_esfq. ++ ++config NET_SCH_ESFQ_NFCT ++ bool "Connection Tracking Hash Types" ++ depends on NET_SCH_ESFQ && NF_CONNTRACK ++ ---help--- ++ Say Y here to enable support for hashing based on netfilter connection ++ tracking information. This is useful for a router that is also using ++ NAT to connect privately-addressed hosts to the Internet. If you want ++ to provide fair distribution of upstream bandwidth, ESFQ must use ++ connection tracking information, since all outgoing packets will share ++ the same source address. ++ + config NET_SCH_TEQL + tristate "True Link Equalizer (TEQL)" + ---help--- +--- a/net/sched/Makefile ++++ b/net/sched/Makefile +@@ -24,6 +24,7 @@ + obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o + obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o + obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o ++obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o + obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o + obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o + obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o +--- /dev/null ++++ b/net/sched/sch_esfq.c +@@ -0,0 +1,702 @@ ++/* ++ * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> ++ * ++ * Changes: Alexander Atanasov, <alex@ssi.bg> ++ * Added dynamic depth,limit,divisor,hash_kind options. ++ * Added dst and src hashes. ++ * ++ * Alexander Clouter, <alex@digriz.org.uk> ++ * Ported ESFQ to Linux 2.6. ++ * ++ * Corey Hickey, <bugfood-c@fatooh.org> ++ * Maintenance of the Linux 2.6 port. ++ * Added fwmark hash (thanks to Robert Kurjata). ++ * Added usage of jhash. ++ * Added conntrack support. ++ * Added ctnatchg hash (thanks to Ben Pfountz). ++ */ ++ ++#include <linux/module.h> ++#include <asm/uaccess.h> ++#include <asm/system.h> ++#include <linux/bitops.h> ++#include <linux/types.h> ++#include <linux/kernel.h> ++#include <linux/jiffies.h> ++#include <linux/string.h> ++#include <linux/mm.h> ++#include <linux/socket.h> ++#include <linux/sockios.h> ++#include <linux/in.h> ++#include <linux/errno.h> ++#include <linux/interrupt.h> ++#include <linux/if_ether.h> ++#include <linux/inet.h> ++#include <linux/netdevice.h> ++#include <linux/etherdevice.h> ++#include <linux/notifier.h> ++#include <linux/init.h> ++#include <net/ip.h> ++#include <net/netlink.h> ++#include <linux/ipv6.h> ++#include <net/route.h> ++#include <linux/skbuff.h> ++#include <net/sock.h> ++#include <net/pkt_sched.h> ++#include <linux/jhash.h> ++#ifdef CONFIG_NET_SCH_ESFQ_NFCT ++#include <net/netfilter/nf_conntrack.h> ++#endif ++ ++/* Stochastic Fairness Queuing algorithm. ++ For more comments look at sch_sfq.c. ++ The difference is that you can change limit, depth, ++ hash table size and choose alternate hash types. ++ ++ classic: same as in sch_sfq.c ++ dst: destination IP address ++ src: source IP address ++ fwmark: netfilter mark value ++ ctorigdst: original destination IP address ++ ctorigsrc: original source IP address ++ ctrepldst: reply destination IP address ++ ctreplsrc: reply source IP ++ ++*/ ++ ++#define ESFQ_HEAD 0 ++#define ESFQ_TAIL 1 ++ ++/* This type should contain at least SFQ_DEPTH*2 values */ ++typedef unsigned int esfq_index; ++ ++struct esfq_head ++{ ++ esfq_index next; ++ esfq_index prev; ++}; ++ ++struct esfq_sched_data ++{ ++/* Parameters */ ++ int perturb_period; ++ unsigned quantum; /* Allotment per round: MUST BE >= MTU */ ++ int limit; ++ unsigned depth; ++ unsigned hash_divisor; ++ unsigned hash_kind; ++/* Variables */ ++ struct timer_list perturb_timer; ++ int perturbation; ++ esfq_index tail; /* Index of current slot in round */ ++ esfq_index max_depth; /* Maximal depth */ ++ ++ esfq_index *ht; /* Hash table */ ++ esfq_index *next; /* Active slots link */ ++ short *allot; /* Current allotment per slot */ ++ unsigned short *hash; /* Hash value indexed by slots */ ++ struct sk_buff_head *qs; /* Slot queue */ ++ struct esfq_head *dep; /* Linked list of slots, indexed by depth */ ++}; ++ ++/* This contains the info we will hash. */ ++struct esfq_packet_info ++{ ++ u32 proto; /* protocol or port */ ++ u32 src; /* source from packet header */ ++ u32 dst; /* destination from packet header */ ++ u32 ctorigsrc; /* original source from conntrack */ ++ u32 ctorigdst; /* original destination from conntrack */ ++ u32 ctreplsrc; /* reply source from conntrack */ ++ u32 ctrepldst; /* reply destination from conntrack */ ++ u32 mark; /* netfilter mark (fwmark) */ ++}; ++ ++static __inline__ unsigned esfq_jhash_1word(struct esfq_sched_data *q,u32 a) ++{ ++ return jhash_1word(a, q->perturbation) & (q->hash_divisor-1); ++} ++ ++static __inline__ unsigned esfq_jhash_2words(struct esfq_sched_data *q, u32 a, u32 b) ++{ ++ return jhash_2words(a, b, q->perturbation) & (q->hash_divisor-1); ++} ++ ++static __inline__ unsigned esfq_jhash_3words(struct esfq_sched_data *q, u32 a, u32 b, u32 c) ++{ ++ return jhash_3words(a, b, c, q->perturbation) & (q->hash_divisor-1); ++} ++ ++static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb) ++{ ++ struct esfq_packet_info info; ++#ifdef CONFIG_NET_SCH_ESFQ_NFCT ++ enum ip_conntrack_info ctinfo; ++ struct nf_conn *ct = nf_ct_get(skb, &ctinfo); ++#endif ++ ++ switch (skb->protocol) { ++ case __constant_htons(ETH_P_IP): ++ { ++ struct iphdr *iph = ip_hdr(skb); ++ info.dst = iph->daddr; ++ info.src = iph->saddr; ++ if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && ++ (iph->protocol == IPPROTO_TCP || ++ iph->protocol == IPPROTO_UDP || ++ iph->protocol == IPPROTO_SCTP || ++ iph->protocol == IPPROTO_DCCP || ++ iph->protocol == IPPROTO_ESP)) ++ info.proto = *(((u32*)iph) + iph->ihl); ++ else ++ info.proto = iph->protocol; ++ break; ++ } ++ case __constant_htons(ETH_P_IPV6): ++ { ++ struct ipv6hdr *iph = ipv6_hdr(skb); ++ /* Hash ipv6 addresses into a u32. This isn't ideal, ++ * but the code is simple. */ ++ info.dst = jhash2(iph->daddr.s6_addr32, 4, q->perturbation); ++ info.src = jhash2(iph->saddr.s6_addr32, 4, q->perturbation); ++ if (iph->nexthdr == IPPROTO_TCP || ++ iph->nexthdr == IPPROTO_UDP || ++ iph->nexthdr == IPPROTO_SCTP || ++ iph->nexthdr == IPPROTO_DCCP || ++ iph->nexthdr == IPPROTO_ESP) ++ info.proto = *(u32*)&iph[1]; ++ else ++ info.proto = iph->nexthdr; ++ break; ++ } ++ default: ++ info.dst = (u32)(unsigned long)skb_dst(skb); ++ info.src = (u32)(unsigned long)skb->sk; ++ info.proto = skb->protocol; ++ } ++ ++ info.mark = skb->mark; ++ ++#ifdef CONFIG_NET_SCH_ESFQ_NFCT ++ /* defaults if there is no conntrack info */ ++ info.ctorigsrc = info.src; ++ info.ctorigdst = info.dst; ++ info.ctreplsrc = info.dst; ++ info.ctrepldst = info.src; ++ /* collect conntrack info */ ++ if (ct && ct != &nf_conntrack_untracked) { ++ if (skb->protocol == __constant_htons(ETH_P_IP)) { ++ info.ctorigsrc = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; ++ info.ctorigdst = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip; ++ info.ctreplsrc = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip; ++ info.ctrepldst = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip; ++ } ++ else if (skb->protocol == __constant_htons(ETH_P_IPV6)) { ++ /* Again, hash ipv6 addresses into a single u32. */ ++ info.ctorigsrc = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6, 4, q->perturbation); ++ info.ctorigdst = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip6, 4, q->perturbation); ++ info.ctreplsrc = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6, 4, q->perturbation); ++ info.ctrepldst = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6, 4, q->perturbation); ++ } ++ ++ } ++#endif ++ ++ switch(q->hash_kind) { ++ case TCA_SFQ_HASH_CLASSIC: ++ return esfq_jhash_3words(q, info.dst, info.src, info.proto); ++ case TCA_SFQ_HASH_DST: ++ return esfq_jhash_1word(q, info.dst); ++ case TCA_SFQ_HASH_SRC: ++ return esfq_jhash_1word(q, info.src); ++ case TCA_SFQ_HASH_FWMARK: ++ return esfq_jhash_1word(q, info.mark); ++#ifdef CONFIG_NET_SCH_ESFQ_NFCT ++ case TCA_SFQ_HASH_CTORIGDST: ++ return esfq_jhash_1word(q, info.ctorigdst); ++ case TCA_SFQ_HASH_CTORIGSRC: ++ return esfq_jhash_1word(q, info.ctorigsrc); ++ case TCA_SFQ_HASH_CTREPLDST: ++ return esfq_jhash_1word(q, info.ctrepldst); ++ case TCA_SFQ_HASH_CTREPLSRC: ++ return esfq_jhash_1word(q, info.ctreplsrc); ++ case TCA_SFQ_HASH_CTNATCHG: ++ { ++ if (info.ctorigdst == info.ctreplsrc) ++ return esfq_jhash_1word(q, info.ctorigsrc); ++ return esfq_jhash_1word(q, info.ctreplsrc); ++ } ++#endif ++ default: ++ if (net_ratelimit()) ++ printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n"); ++ } ++ return esfq_jhash_3words(q, info.dst, info.src, info.proto); ++} ++ ++static inline void esfq_link(struct esfq_sched_data *q, esfq_index x) ++{ ++ esfq_index p, n; ++ int d = q->qs[x].qlen + q->depth; ++ ++ p = d; ++ n = q->dep[d].next; ++ q->dep[x].next = n; ++ q->dep[x].prev = p; ++ q->dep[p].next = q->dep[n].prev = x; ++} ++ ++static inline void esfq_dec(struct esfq_sched_data *q, esfq_index x) ++{ ++ esfq_index p, n; ++ ++ n = q->dep[x].next; ++ p = q->dep[x].prev; ++ q->dep[p].next = n; ++ q->dep[n].prev = p; ++ ++ if (n == p && q->max_depth == q->qs[x].qlen + 1) ++ q->max_depth--; ++ ++ esfq_link(q, x); ++} ++ ++static inline void esfq_inc(struct esfq_sched_data *q, esfq_index x) ++{ ++ esfq_index p, n; ++ int d; ++ ++ n = q->dep[x].next; ++ p = q->dep[x].prev; ++ q->dep[p].next = n; ++ q->dep[n].prev = p; ++ d = q->qs[x].qlen; ++ if (q->max_depth < d) ++ q->max_depth = d; ++ ++ esfq_link(q, x); ++} ++ ++static unsigned int esfq_drop(struct Qdisc *sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ esfq_index d = q->max_depth; ++ struct sk_buff *skb; ++ unsigned int len; ++ ++ /* Queue is full! Find the longest slot and ++ drop a packet from it */ ++ ++ if (d > 1) { ++ esfq_index x = q->dep[d+q->depth].next; ++ skb = q->qs[x].prev; ++ len = skb->len; ++ __skb_unlink(skb, &q->qs[x]); ++ kfree_skb(skb); ++ esfq_dec(q, x); ++ sch->q.qlen--; ++ sch->qstats.drops++; ++ sch->qstats.backlog -= len; ++ return len; ++ } ++ ++ if (d == 1) { ++ /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ ++ d = q->next[q->tail]; ++ q->next[q->tail] = q->next[d]; ++ q->allot[q->next[d]] += q->quantum; ++ skb = q->qs[d].prev; ++ len = skb->len; ++ __skb_unlink(skb, &q->qs[d]); ++ kfree_skb(skb); ++ esfq_dec(q, d); ++ sch->q.qlen--; ++ q->ht[q->hash[d]] = q->depth; ++ sch->qstats.drops++; ++ sch->qstats.backlog -= len; ++ return len; ++ } ++ ++ return 0; ++} ++ ++static void esfq_q_enqueue(struct sk_buff *skb, struct esfq_sched_data *q, unsigned int end) ++{ ++ unsigned hash = esfq_hash(q, skb); ++ unsigned depth = q->depth; ++ esfq_index x; ++ ++ x = q->ht[hash]; ++ if (x == depth) { ++ q->ht[hash] = x = q->dep[depth].next; ++ q->hash[x] = hash; ++ } ++ ++ if (end == ESFQ_TAIL) ++ __skb_queue_tail(&q->qs[x], skb); ++ else ++ __skb_queue_head(&q->qs[x], skb); ++ ++ esfq_inc(q, x); ++ if (q->qs[x].qlen == 1) { /* The flow is new */ ++ if (q->tail == depth) { /* It is the first flow */ ++ q->tail = x; ++ q->next[x] = x; ++ q->allot[x] = q->quantum; ++ } else { ++ q->next[x] = q->next[q->tail]; ++ q->next[q->tail] = x; ++ q->tail = x; ++ } ++ } ++} ++ ++static int esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ esfq_q_enqueue(skb, q, ESFQ_TAIL); ++ sch->qstats.backlog += skb->len; ++ if (++sch->q.qlen < q->limit-1) { ++ sch->bstats.bytes += skb->len; ++ sch->bstats.packets++; ++ return 0; ++ } ++ ++ sch->qstats.drops++; ++ esfq_drop(sch); ++ return NET_XMIT_CN; ++} ++ ++static struct sk_buff *esfq_peek(struct Qdisc* sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ esfq_index a; ++ ++ /* No active slots */ ++ if (q->tail == q->depth) ++ return NULL; ++ ++ a = q->next[q->tail]; ++ return skb_peek(&q->qs[a]); ++} ++ ++static struct sk_buff *esfq_q_dequeue(struct esfq_sched_data *q) ++{ ++ struct sk_buff *skb; ++ unsigned depth = q->depth; ++ esfq_index a, old_a; ++ ++ /* No active slots */ ++ if (q->tail == depth) ++ return NULL; ++ ++ a = old_a = q->next[q->tail]; ++ ++ /* Grab packet */ ++ skb = __skb_dequeue(&q->qs[a]); ++ esfq_dec(q, a); ++ ++ /* Is the slot empty? */ ++ if (q->qs[a].qlen == 0) { ++ q->ht[q->hash[a]] = depth; ++ a = q->next[a]; ++ if (a == old_a) { ++ q->tail = depth; ++ return skb; ++ } ++ q->next[q->tail] = a; ++ q->allot[a] += q->quantum; ++ } else if ((q->allot[a] -= skb->len) <= 0) { ++ q->tail = a; ++ a = q->next[a]; ++ q->allot[a] += q->quantum; ++ } ++ ++ return skb; ++} ++ ++static struct sk_buff *esfq_dequeue(struct Qdisc* sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ struct sk_buff *skb; ++ ++ skb = esfq_q_dequeue(q); ++ if (skb == NULL) ++ return NULL; ++ sch->q.qlen--; ++ sch->qstats.backlog -= skb->len; ++ return skb; ++} ++ ++static void esfq_q_destroy(struct esfq_sched_data *q) ++{ ++ del_timer(&q->perturb_timer); ++ if(q->ht) ++ kfree(q->ht); ++ if(q->dep) ++ kfree(q->dep); ++ if(q->next) ++ kfree(q->next); ++ if(q->allot) ++ kfree(q->allot); ++ if(q->hash) ++ kfree(q->hash); ++ if(q->qs) ++ kfree(q->qs); ++} ++ ++static void esfq_destroy(struct Qdisc *sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ esfq_q_destroy(q); ++} ++ ++ ++static void esfq_reset(struct Qdisc* sch) ++{ ++ struct sk_buff *skb; ++ ++ while ((skb = esfq_dequeue(sch)) != NULL) ++ kfree_skb(skb); ++} ++ ++static void esfq_perturbation(unsigned long arg) ++{ ++ struct Qdisc *sch = (struct Qdisc*)arg; ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ ++ q->perturbation = net_random()&0x1F; ++ ++ if (q->perturb_period) { ++ q->perturb_timer.expires = jiffies + q->perturb_period; ++ add_timer(&q->perturb_timer); ++ } ++} ++ ++static unsigned int esfq_check_hash(unsigned int kind) ++{ ++ switch (kind) { ++ case TCA_SFQ_HASH_CTORIGDST: ++ case TCA_SFQ_HASH_CTORIGSRC: ++ case TCA_SFQ_HASH_CTREPLDST: ++ case TCA_SFQ_HASH_CTREPLSRC: ++ case TCA_SFQ_HASH_CTNATCHG: ++#ifndef CONFIG_NET_SCH_ESFQ_NFCT ++ { ++ if (net_ratelimit()) ++ printk(KERN_WARNING "ESFQ: Conntrack hash types disabled in kernel config. Falling back to classic.\n"); ++ return TCA_SFQ_HASH_CLASSIC; ++ } ++#endif ++ case TCA_SFQ_HASH_CLASSIC: ++ case TCA_SFQ_HASH_DST: ++ case TCA_SFQ_HASH_SRC: ++ case TCA_SFQ_HASH_FWMARK: ++ return kind; ++ default: ++ { ++ if (net_ratelimit()) ++ printk(KERN_WARNING "ESFQ: Unknown hash type. Falling back to classic.\n"); ++ return TCA_SFQ_HASH_CLASSIC; ++ } ++ } ++} ++ ++static int esfq_q_init(struct esfq_sched_data *q, struct nlattr *opt) ++{ ++ struct tc_esfq_qopt *ctl = nla_data(opt); ++ esfq_index p = ~0U/2; ++ int i; ++ ++ if (opt && opt->nla_len < nla_attr_size(sizeof(*ctl))) ++ return -EINVAL; ++ ++ q->perturbation = 0; ++ q->hash_kind = TCA_SFQ_HASH_CLASSIC; ++ q->max_depth = 0; ++ if (opt == NULL) { ++ q->perturb_period = 0; ++ q->hash_divisor = 1024; ++ q->tail = q->limit = q->depth = 128; ++ ++ } else { ++ struct tc_esfq_qopt *ctl = nla_data(opt); ++ if (ctl->quantum) ++ q->quantum = ctl->quantum; ++ q->perturb_period = ctl->perturb_period*HZ; ++ q->hash_divisor = ctl->divisor ? : 1024; ++ q->tail = q->limit = q->depth = ctl->flows ? : 128; ++ ++ if ( q->depth > p - 1 ) ++ return -EINVAL; ++ ++ if (ctl->limit) ++ q->limit = min_t(u32, ctl->limit, q->depth); ++ ++ if (ctl->hash_kind) { ++ q->hash_kind = esfq_check_hash(ctl->hash_kind); ++ } ++ } ++ ++ q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL); ++ if (!q->ht) ++ goto err_case; ++ q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL); ++ if (!q->dep) ++ goto err_case; ++ q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL); ++ if (!q->next) ++ goto err_case; ++ q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL); ++ if (!q->allot) ++ goto err_case; ++ q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL); ++ if (!q->hash) ++ goto err_case; ++ q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL); ++ if (!q->qs) ++ goto err_case; ++ ++ for (i=0; i< q->hash_divisor; i++) ++ q->ht[i] = q->depth; ++ for (i=0; i<q->depth; i++) { ++ skb_queue_head_init(&q->qs[i]); ++ q->dep[i+q->depth].next = i+q->depth; ++ q->dep[i+q->depth].prev = i+q->depth; ++ } ++ ++ for (i=0; i<q->depth; i++) ++ esfq_link(q, i); ++ return 0; ++err_case: ++ esfq_q_destroy(q); ++ return -ENOBUFS; ++} ++ ++static int esfq_init(struct Qdisc *sch, struct nlattr *opt) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ int err; ++ ++ q->quantum = psched_mtu(qdisc_dev(sch)); /* default */ ++ if ((err = esfq_q_init(q, opt))) ++ return err; ++ ++ init_timer(&q->perturb_timer); ++ q->perturb_timer.data = (unsigned long)sch; ++ q->perturb_timer.function = esfq_perturbation; ++ if (q->perturb_period) { ++ q->perturb_timer.expires = jiffies + q->perturb_period; ++ add_timer(&q->perturb_timer); ++ } ++ ++ return 0; ++} ++ ++static int esfq_change(struct Qdisc *sch, struct nlattr *opt) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ struct esfq_sched_data new; ++ struct sk_buff *skb; ++ int err; ++ ++ /* set up new queue */ ++ memset(&new, 0, sizeof(struct esfq_sched_data)); ++ new.quantum = psched_mtu(qdisc_dev(sch)); /* default */ ++ if ((err = esfq_q_init(&new, opt))) ++ return err; ++ ++ /* copy all packets from the old queue to the new queue */ ++ sch_tree_lock(sch); ++ while ((skb = esfq_q_dequeue(q)) != NULL) ++ esfq_q_enqueue(skb, &new, ESFQ_TAIL); ++ ++ /* clean up the old queue */ ++ esfq_q_destroy(q); ++ ++ /* copy elements of the new queue into the old queue */ ++ q->perturb_period = new.perturb_period; ++ q->quantum = new.quantum; ++ q->limit = new.limit; ++ q->depth = new.depth; ++ q->hash_divisor = new.hash_divisor; ++ q->hash_kind = new.hash_kind; ++ q->tail = new.tail; ++ q->max_depth = new.max_depth; ++ q->ht = new.ht; ++ q->dep = new.dep; ++ q->next = new.next; ++ q->allot = new.allot; ++ q->hash = new.hash; ++ q->qs = new.qs; ++ ++ /* finish up */ ++ if (q->perturb_period) { ++ q->perturb_timer.expires = jiffies + q->perturb_period; ++ add_timer(&q->perturb_timer); ++ } else { ++ q->perturbation = 0; ++ } ++ sch_tree_unlock(sch); ++ return 0; ++} ++ ++static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ unsigned char *b = skb_tail_pointer(skb); ++ struct tc_esfq_qopt opt; ++ ++ opt.quantum = q->quantum; ++ opt.perturb_period = q->perturb_period/HZ; ++ ++ opt.limit = q->limit; ++ opt.divisor = q->hash_divisor; ++ opt.flows = q->depth; ++ opt.hash_kind = q->hash_kind; ++ ++ NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); ++ ++ return skb->len; ++ ++nla_put_failure: ++ nlmsg_trim(skb, b); ++ return -1; ++} ++ ++static struct Qdisc_ops esfq_qdisc_ops = ++{ ++ .next = NULL, ++ .cl_ops = NULL, ++ .id = "esfq", ++ .priv_size = sizeof(struct esfq_sched_data), ++ .enqueue = esfq_enqueue, ++ .dequeue = esfq_dequeue, ++ .peek = esfq_peek, ++ .drop = esfq_drop, ++ .init = esfq_init, ++ .reset = esfq_reset, ++ .destroy = esfq_destroy, ++ .change = esfq_change, ++ .dump = esfq_dump, ++ .owner = THIS_MODULE, ++}; ++ ++static int __init esfq_module_init(void) ++{ ++ return register_qdisc(&esfq_qdisc_ops); ++} ++static void __exit esfq_module_exit(void) ++{ ++ unregister_qdisc(&esfq_qdisc_ops); ++} ++module_init(esfq_module_init) ++module_exit(esfq_module_exit) ++MODULE_LICENSE("GPL"); + |
#vim 200-sched_esfq .patch
--- /dev/null 2010-02-26 12:46:03.368409969 +0000 +++ 200-sched_esfq 2010-02-26 15:13:05.000000000 +0000 @@ -0,0 +1,796 @@ +--- a/include/linux/pkt_sched.h ++++ b/include/linux/pkt_sched.h +@@ -182,8 +182,37 @@ struct tc_sfq_xstats + * + * The only reason for this is efficiency, it is possible + * to change these parameters in compile time. ++ * ++ * If you need to play with these values, use esfq instead. + */ + ++/* ESFQ section */ ++ ++enum ++{ ++ /* traditional */ ++ TCA_SFQ_HASH_CLASSIC, ++ TCA_SFQ_HASH_DST, ++ TCA_SFQ_HASH_SRC, ++ TCA_SFQ_HASH_FWMARK, ++ /* conntrack */ ++ TCA_SFQ_HASH_CTORIGDST, ++ TCA_SFQ_HASH_CTORIGSRC, ++ TCA_SFQ_HASH_CTREPLDST, ++ TCA_SFQ_HASH_CTREPLSRC, ++ TCA_SFQ_HASH_CTNATCHG, ++}; ++ ++struct tc_esfq_qopt ++{ ++ unsigned quantum; /* Bytes per round allocated to flow */ ++ int perturb_period; /* Period of hash perturbation */ ++ __u32 limit; /* Maximal packets in queue */ ++ unsigned divisor; /* Hash divisor */ ++ unsigned flows; /* Maximal number of flows */ ++ unsigned hash_kind; /* Hash function to use for flow identification */ ++}; ++ + /* RED section */ + + enum +--- a/net/sched/Kconfig ++++ b/net/sched/Kconfig +@@ -137,6 +137,37 @@ config NET_SCH_SFQ + To compile this code as a module, choose M here: the + module will be called sch_sfq. + ++config NET_SCH_ESFQ ++ tristate "Enhanced Stochastic Fairness Queueing (ESFQ)" ++ ---help--- ++ Say Y here if you want to use the Enhanced Stochastic Fairness ++ Queueing (ESFQ) packet scheduling algorithm for some of your network ++ devices or as a leaf discipline for a classful qdisc such as HTB or ++ CBQ (see the top of <file:net/sched/sch_esfq.c> for details and ++ references to the SFQ algorithm). ++ ++ This is an enchanced SFQ version which allows you to control some ++ hardcoded values in the SFQ scheduler. ++ ++ ESFQ also adds control of the hash function used to identify packet ++ flows. The original SFQ discipline hashes by connection; ESFQ add ++ several other hashing methods, such as by src IP or by dst IP, which ++ can be more fair to users in some networking situations. ++ ++ To compile this code as a module, choose M here: the ++ module will be called sch_esfq. ++ ++config NET_SCH_ESFQ_NFCT ++ bool "Connection Tracking Hash Types" ++ depends on NET_SCH_ESFQ && NF_CONNTRACK ++ ---help--- ++ Say Y here to enable support for hashing based on netfilter connection ++ tracking information. This is useful for a router that is also using ++ NAT to connect privately-addressed hosts to the Internet. If you want ++ to provide fair distribution of upstream bandwidth, ESFQ must use ++ connection tracking information, since all outgoing packets will share ++ the same source address. ++ + config NET_SCH_TEQL + tristate "True Link Equalizer (TEQL)" + ---help--- +--- a/net/sched/Makefile ++++ b/net/sched/Makefile +@@ -24,6 +24,7 @@ obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o + obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o + obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o + obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o ++obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o + obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o + obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o + obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o +--- /dev/null ++++ b/net/sched/sch_esfq.c +@@ -0,0 +1,702 @@ ++/* ++ * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> ++ * ++ * Changes: Alexander Atanasov, <alex@ssi.bg> ++ * Added dynamic depth,limit,divisor,hash_kind options. ++ * Added dst and src hashes. ++ * ++ * Alexander Clouter, <alex@digriz.org.uk> ++ * Ported ESFQ to Linux 2.6. ++ * ++ * Corey Hickey, <bugfood-c@fatooh.org> ++ * Maintenance of the Linux 2.6 port. ++ * Added fwmark hash (thanks to Robert Kurjata). ++ * Added usage of jhash. ++ * Added conntrack support. ++ * Added ctnatchg hash (thanks to Ben Pfountz). ++ */ ++ ++#include <linux/module.h> ++#include <asm/uaccess.h> ++#include <asm/system.h> ++#include <linux/bitops.h> ++#include <linux/types.h> ++#include <linux/kernel.h> ++#include <linux/jiffies.h> ++#include <linux/string.h> ++#include <linux/mm.h> ++#include <linux/socket.h> ++#include <linux/sockios.h> ++#include <linux/in.h> ++#include <linux/errno.h> ++#include <linux/interrupt.h> ++#include <linux/if_ether.h> ++#include <linux/inet.h> ++#include <linux/netdevice.h> ++#include <linux/etherdevice.h> ++#include <linux/notifier.h> ++#include <linux/init.h> ++#include <net/ip.h> ++#include <net/netlink.h> ++#include <linux/ipv6.h> ++#include <net/route.h> ++#include <linux/skbuff.h> ++#include <net/sock.h> ++#include <net/pkt_sched.h> ++#include <linux/jhash.h> ++#ifdef CONFIG_NET_SCH_ESFQ_NFCT ++#include <net/netfilter/nf_conntrack.h> ++#endif ++ ++/* Stochastic Fairness Queuing algorithm. ++ For more comments look at sch_sfq.c. ++ The difference is that you can change limit, depth, ++ hash table size and choose alternate hash types. ++ ++ classic: same as in sch_sfq.c ++ dst: destination IP address ++ src: source IP address ++ fwmark: netfilter mark value ++ ctorigdst: original destination IP address ++ ctorigsrc: original source IP address ++ ctrepldst: reply destination IP address ++ ctreplsrc: reply source IP ++ ++*/ ++ ++#define ESFQ_HEAD 0 ++#define ESFQ_TAIL 1 ++ ++/* This type should contain at least SFQ_DEPTH*2 values */ ++typedef unsigned int esfq_index; ++ ++struct esfq_head ++{ ++ esfq_index next; ++ esfq_index prev; ++}; ++ ++struct esfq_sched_data ++{ ++/* Parameters */ ++ int perturb_period; ++ unsigned quantum; /* Allotment per round: MUST BE >= MTU */ ++ int limit; ++ unsigned depth; ++ unsigned hash_divisor; ++ unsigned hash_kind; ++/* Variables */ ++ struct timer_list perturb_timer; ++ int perturbation; ++ esfq_index tail; /* Index of current slot in round */ ++ esfq_index max_depth; /* Maximal depth */ ++ ++ esfq_index *ht; /* Hash table */ ++ esfq_index *next; /* Active slots link */ ++ short *allot; /* Current allotment per slot */ ++ unsigned short *hash; /* Hash value indexed by slots */ ++ struct sk_buff_head *qs; /* Slot queue */ ++ struct esfq_head *dep; /* Linked list of slots, indexed by depth */ ++}; ++ ++/* This contains the info we will hash. */ ++struct esfq_packet_info ++{ ++ u32 proto; /* protocol or port */ ++ u32 src; /* source from packet header */ ++ u32 dst; /* destination from packet header */ ++ u32 ctorigsrc; /* original source from conntrack */ ++ u32 ctorigdst; /* original destination from conntrack */ ++ u32 ctreplsrc; /* reply source from conntrack */ ++ u32 ctrepldst; /* reply destination from conntrack */ ++ u32 mark; /* netfilter mark (fwmark) */ ++}; ++ ++static __inline__ unsigned esfq_jhash_1word(struct esfq_sched_data *q,u32 a) ++{ ++ return jhash_1word(a, q->perturbation) & (q->hash_divisor-1); ++} ++ ++static __inline__ unsigned esfq_jhash_2words(struct esfq_sched_data *q, u32 a, u32 b) ++{ ++ return jhash_2words(a, b, q->perturbation) & (q->hash_divisor-1); ++} ++ ++static __inline__ unsigned esfq_jhash_3words(struct esfq_sched_data *q, u32 a, u32 b, u32 c) ++{ ++ return jhash_3words(a, b, c, q->perturbation) & (q->hash_divisor-1); ++} ++ ++static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb) ++{ ++ struct esfq_packet_info info; ++#ifdef CONFIG_NET_SCH_ESFQ_NFCT ++ enum ip_conntrack_info ctinfo; ++ struct nf_conn *ct = nf_ct_get(skb, &ctinfo); ++#endif ++ ++ switch (skb->protocol) { ++ case __constant_htons(ETH_P_IP): ++ { ++ struct iphdr *iph = ip_hdr(skb); ++ info.dst = iph->daddr; ++ info.src = iph->saddr; ++ if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && ++ (iph->protocol == IPPROTO_TCP || ++ iph->protocol == IPPROTO_UDP || ++ iph->protocol == IPPROTO_SCTP || ++ iph->protocol == IPPROTO_DCCP || ++ iph->protocol == IPPROTO_ESP)) ++ info.proto = *(((u32*)iph) + iph->ihl); ++ else ++ info.proto = iph->protocol; ++ break; ++ } ++ case __constant_htons(ETH_P_IPV6): ++ { ++ struct ipv6hdr *iph = ipv6_hdr(skb); ++ /* Hash ipv6 addresses into a u32. This isn't ideal, ++ * but the code is simple. */ ++ info.dst = jhash2(iph->daddr.s6_addr32, 4, q->perturbation); ++ info.src = jhash2(iph->saddr.s6_addr32, 4, q->perturbation); ++ if (iph->nexthdr == IPPROTO_TCP || ++ iph->nexthdr == IPPROTO_UDP || ++ iph->nexthdr == IPPROTO_SCTP || ++ iph->nexthdr == IPPROTO_DCCP || ++ iph->nexthdr == IPPROTO_ESP) ++ info.proto = *(u32*)&iph[1]; ++ else ++ info.proto = iph->nexthdr; ++ break; ++ } ++ default: ++ info.dst = (u32)(unsigned long)skb_dst(skb); ++ info.src = (u32)(unsigned long)skb->sk; ++ info.proto = skb->protocol; ++ } ++ ++ info.mark = skb->mark; ++ ++#ifdef CONFIG_NET_SCH_ESFQ_NFCT ++ /* defaults if there is no conntrack info */ ++ info.ctorigsrc = info.src; ++ info.ctorigdst = info.dst; ++ info.ctreplsrc = info.dst; ++ info.ctrepldst = info.src; ++ /* collect conntrack info */ ++ if (ct && ct != &nf_conntrack_untracked) { ++ if (skb->protocol == __constant_htons(ETH_P_IP)) { ++ info.ctorigsrc = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; ++ info.ctorigdst = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip; ++ info.ctreplsrc = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip; ++ info.ctrepldst = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip; ++ } ++ else if (skb->protocol == __constant_htons(ETH_P_IPV6)) { ++ /* Again, hash ipv6 addresses into a single u32. */ ++ info.ctorigsrc = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6, 4, q->perturbation); ++ info.ctorigdst = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip6, 4, q->perturbation); ++ info.ctreplsrc = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6, 4, q->perturbation); ++ info.ctrepldst = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6, 4, q->perturbation); ++ } ++ ++ } ++#endif ++ ++ switch(q->hash_kind) { ++ case TCA_SFQ_HASH_CLASSIC: ++ return esfq_jhash_3words(q, info.dst, info.src, info.proto); ++ case TCA_SFQ_HASH_DST: ++ return esfq_jhash_1word(q, info.dst); ++ case TCA_SFQ_HASH_SRC: ++ return esfq_jhash_1word(q, info.src); ++ case TCA_SFQ_HASH_FWMARK: ++ return esfq_jhash_1word(q, info.mark); ++#ifdef CONFIG_NET_SCH_ESFQ_NFCT ++ case TCA_SFQ_HASH_CTORIGDST: ++ return esfq_jhash_1word(q, info.ctorigdst); ++ case TCA_SFQ_HASH_CTORIGSRC: ++ return esfq_jhash_1word(q, info.ctorigsrc); ++ case TCA_SFQ_HASH_CTREPLDST: ++ return esfq_jhash_1word(q, info.ctrepldst); ++ case TCA_SFQ_HASH_CTREPLSRC: ++ return esfq_jhash_1word(q, info.ctreplsrc); ++ case TCA_SFQ_HASH_CTNATCHG: ++ { ++ if (info.ctorigdst == info.ctreplsrc) ++ return esfq_jhash_1word(q, info.ctorigsrc); ++ return esfq_jhash_1word(q, info.ctreplsrc); ++ } ++#endif ++ default: ++ if (net_ratelimit()) ++ printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n"); ++ } ++ return esfq_jhash_3words(q, info.dst, info.src, info.proto); ++} ++ ++static inline void esfq_link(struct esfq_sched_data *q, esfq_index x) ++{ ++ esfq_index p, n; ++ int d = q->qs[x].qlen + q->depth; ++ ++ p = d; ++ n = q->dep[d].next; ++ q->dep[x].next = n; ++ q->dep[x].prev = p; ++ q->dep[p].next = q->dep[n].prev = x; ++} ++ ++static inline void esfq_dec(struct esfq_sched_data *q, esfq_index x) ++{ ++ esfq_index p, n; ++ ++ n = q->dep[x].next; ++ p = q->dep[x].prev; ++ q->dep[p].next = n; ++ q->dep[n].prev = p; ++ ++ if (n == p && q->max_depth == q->qs[x].qlen + 1) ++ q->max_depth--; ++ ++ esfq_link(q, x); ++} ++ ++static inline void esfq_inc(struct esfq_sched_data *q, esfq_index x) ++{ ++ esfq_index p, n; ++ int d; ++ ++ n = q->dep[x].next; ++ p = q->dep[x].prev; ++ q->dep[p].next = n; ++ q->dep[n].prev = p; ++ d = q->qs[x].qlen; ++ if (q->max_depth < d) ++ q->max_depth = d; ++ ++ esfq_link(q, x); ++} ++ ++static unsigned int esfq_drop(struct Qdisc *sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ esfq_index d = q->max_depth; ++ struct sk_buff *skb; ++ unsigned int len; ++ ++ /* Queue is full! Find the longest slot and ++ drop a packet from it */ ++ ++ if (d > 1) { ++ esfq_index x = q->dep[d+q->depth].next; ++ skb = q->qs[x].prev; ++ len = skb->len; ++ __skb_unlink(skb, &q->qs[x]); ++ kfree_skb(skb); ++ esfq_dec(q, x); ++ sch->q.qlen--; ++ sch->qstats.drops++; ++ sch->qstats.backlog -= len; ++ return len; ++ } ++ ++ if (d == 1) { ++ /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ ++ d = q->next[q->tail]; ++ q->next[q->tail] = q->next[d]; ++ q->allot[q->next[d]] += q->quantum; ++ skb = q->qs[d].prev; ++ len = skb->len; ++ __skb_unlink(skb, &q->qs[d]); ++ kfree_skb(skb); ++ esfq_dec(q, d); ++ sch->q.qlen--; ++ q->ht[q->hash[d]] = q->depth; ++ sch->qstats.drops++; ++ sch->qstats.backlog -= len; ++ return len; ++ } ++ ++ return 0; ++} ++ ++static void esfq_q_enqueue(struct sk_buff *skb, struct esfq_sched_data *q, unsigned int end) ++{ ++ unsigned hash = esfq_hash(q, skb); ++ unsigned depth = q->depth; ++ esfq_index x; ++ ++ x = q->ht[hash]; ++ if (x == depth) { ++ q->ht[hash] = x = q->dep[depth].next; ++ q->hash[x] = hash; ++ } ++ ++ if (end == ESFQ_TAIL) ++ __skb_queue_tail(&q->qs[x], skb); ++ else ++ __skb_queue_head(&q->qs[x], skb); ++ ++ esfq_inc(q, x); ++ if (q->qs[x].qlen == 1) { /* The flow is new */ ++ if (q->tail == depth) { /* It is the first flow */ ++ q->tail = x; ++ q->next[x] = x; ++ q->allot[x] = q->quantum; ++ } else { ++ q->next[x] = q->next[q->tail]; ++ q->next[q->tail] = x; ++ q->tail = x; ++ } ++ } ++} ++ ++static int esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ esfq_q_enqueue(skb, q, ESFQ_TAIL); ++ sch->qstats.backlog += skb->len; ++ if (++sch->q.qlen < q->limit-1) { ++ sch->bstats.bytes += skb->len; ++ sch->bstats.packets++; ++ return 0; ++ } ++ ++ sch->qstats.drops++; ++ esfq_drop(sch); ++ return NET_XMIT_CN; ++} ++ ++static struct sk_buff *esfq_peek(struct Qdisc* sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ esfq_index a; ++ ++ /* No active slots */ ++ if (q->tail == q->depth) ++ return NULL; ++ ++ a = q->next[q->tail]; ++ return skb_peek(&q->qs[a]); ++} ++ ++static struct sk_buff *esfq_q_dequeue(struct esfq_sched_data *q) ++{ ++ struct sk_buff *skb; ++ unsigned depth = q->depth; ++ esfq_index a, old_a; ++ ++ /* No active slots */ ++ if (q->tail == depth) ++ return NULL; ++ ++ a = old_a = q->next[q->tail]; ++ ++ /* Grab packet */ ++ skb = __skb_dequeue(&q->qs[a]); ++ esfq_dec(q, a); ++ ++ /* Is the slot empty? */ ++ if (q->qs[a].qlen == 0) { ++ q->ht[q->hash[a]] = depth; ++ a = q->next[a]; ++ if (a == old_a) { ++ q->tail = depth; ++ return skb; ++ } ++ q->next[q->tail] = a; ++ q->allot[a] += q->quantum; ++ } else if ((q->allot[a] -= skb->len) <= 0) { ++ q->tail = a; ++ a = q->next[a]; ++ q->allot[a] += q->quantum; ++ } ++ ++ return skb; ++} ++ ++static struct sk_buff *esfq_dequeue(struct Qdisc* sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ struct sk_buff *skb; ++ ++ skb = esfq_q_dequeue(q); ++ if (skb == NULL) ++ return NULL; ++ sch->q.qlen--; ++ sch->qstats.backlog -= skb->len; ++ return skb; ++} ++ ++static void esfq_q_destroy(struct esfq_sched_data *q) ++{ ++ del_timer(&q->perturb_timer); ++ if(q->ht) ++ kfree(q->ht); ++ if(q->dep) ++ kfree(q->dep); ++ if(q->next) ++ kfree(q->next); ++ if(q->allot) ++ kfree(q->allot); ++ if(q->hash) ++ kfree(q->hash); ++ if(q->qs) ++ kfree(q->qs); ++} ++ ++static void esfq_destroy(struct Qdisc *sch) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ esfq_q_destroy(q); ++} ++ ++ ++static void esfq_reset(struct Qdisc* sch) ++{ ++ struct sk_buff *skb; ++ ++ while ((skb = esfq_dequeue(sch)) != NULL) ++ kfree_skb(skb); ++} ++ ++static void esfq_perturbation(unsigned long arg) ++{ ++ struct Qdisc *sch = (struct Qdisc*)arg; ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ ++ q->perturbation = net_random()&0x1F; ++ ++ if (q->perturb_period) { ++ q->perturb_timer.expires = jiffies + q->perturb_period; ++ add_timer(&q->perturb_timer); ++ } ++} ++ ++static unsigned int esfq_check_hash(unsigned int kind) ++{ ++ switch (kind) { ++ case TCA_SFQ_HASH_CTORIGDST: ++ case TCA_SFQ_HASH_CTORIGSRC: ++ case TCA_SFQ_HASH_CTREPLDST: ++ case TCA_SFQ_HASH_CTREPLSRC: ++ case TCA_SFQ_HASH_CTNATCHG: ++#ifndef CONFIG_NET_SCH_ESFQ_NFCT ++ { ++ if (net_ratelimit()) ++ printk(KERN_WARNING "ESFQ: Conntrack hash types disabled in kernel config. Falling back to classic.\n"); ++ return TCA_SFQ_HASH_CLASSIC; ++ } ++#endif ++ case TCA_SFQ_HASH_CLASSIC: ++ case TCA_SFQ_HASH_DST: ++ case TCA_SFQ_HASH_SRC: ++ case TCA_SFQ_HASH_FWMARK: ++ return kind; ++ default: ++ { ++ if (net_ratelimit()) ++ printk(KERN_WARNING "ESFQ: Unknown hash type. Falling back to classic.\n"); ++ return TCA_SFQ_HASH_CLASSIC; ++ } ++ } ++} ++ ++static int esfq_q_init(struct esfq_sched_data *q, struct nlattr *opt) ++{ ++ struct tc_esfq_qopt *ctl = nla_data(opt); ++ esfq_index p = ~0U/2; ++ int i; ++ ++ if (opt && opt->nla_len < nla_attr_size(sizeof(*ctl))) ++ return -EINVAL; ++ ++ q->perturbation = 0; ++ q->hash_kind = TCA_SFQ_HASH_CLASSIC; ++ q->max_depth = 0; ++ if (opt == NULL) { ++ q->perturb_period = 0; ++ q->hash_divisor = 1024; ++ q->tail = q->limit = q->depth = 128; ++ ++ } else { ++ struct tc_esfq_qopt *ctl = nla_data(opt); ++ if (ctl->quantum) ++ q->quantum = ctl->quantum; ++ q->perturb_period = ctl->perturb_period*HZ; ++ q->hash_divisor = ctl->divisor ? : 1024; ++ q->tail = q->limit = q->depth = ctl->flows ? : 128; ++ ++ if ( q->depth > p - 1 ) ++ return -EINVAL; ++ ++ if (ctl->limit) ++ q->limit = min_t(u32, ctl->limit, q->depth); ++ ++ if (ctl->hash_kind) { ++ q->hash_kind = esfq_check_hash(ctl->hash_kind); ++ } ++ } ++ ++ q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL); ++ if (!q->ht) ++ goto err_case; ++ q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL); ++ if (!q->dep) ++ goto err_case; ++ q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL); ++ if (!q->next) ++ goto err_case; ++ q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL); ++ if (!q->allot) ++ goto err_case; ++ q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL); ++ if (!q->hash) ++ goto err_case; ++ q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL); ++ if (!q->qs) ++ goto err_case; ++ ++ for (i=0; i< q->hash_divisor; i++) ++ q->ht[i] = q->depth; ++ for (i=0; i<q->depth; i++) { ++ skb_queue_head_init(&q->qs[i]); ++ q->dep[i+q->depth].next = i+q->depth; ++ q->dep[i+q->depth].prev = i+q->depth; ++ } ++ ++ for (i=0; i<q->depth; i++) ++ esfq_link(q, i); ++ return 0; ++err_case: ++ esfq_q_destroy(q); ++ return -ENOBUFS; ++} ++ ++static int esfq_init(struct Qdisc *sch, struct nlattr *opt) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ int err; ++ ++ q->quantum = psched_mtu(qdisc_dev(sch)); /* default */ ++ if ((err = esfq_q_init(q, opt))) ++ return err; ++ ++ init_timer(&q->perturb_timer); ++ q->perturb_timer.data = (unsigned long)sch; ++ q->perturb_timer.function = esfq_perturbation; ++ if (q->perturb_period) { ++ q->perturb_timer.expires = jiffies + q->perturb_period; ++ add_timer(&q->perturb_timer); ++ } ++ ++ return 0; ++} ++ ++static int esfq_change(struct Qdisc *sch, struct nlattr *opt) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ struct esfq_sched_data new; ++ struct sk_buff *skb; ++ int err; ++ ++ /* set up new queue */ ++ memset(&new, 0, sizeof(struct esfq_sched_data)); ++ new.quantum = psched_mtu(qdisc_dev(sch)); /* default */ ++ if ((err = esfq_q_init(&new, opt))) ++ return err; ++ ++ /* copy all packets from the old queue to the new queue */ ++ sch_tree_lock(sch); ++ while ((skb = esfq_q_dequeue(q)) != NULL) ++ esfq_q_enqueue(skb, &new, ESFQ_TAIL); ++ ++ /* clean up the old queue */ ++ esfq_q_destroy(q); ++ ++ /* copy elements of the new queue into the old queue */ ++ q->perturb_period = new.perturb_period; ++ q->quantum = new.quantum; ++ q->limit = new.limit; ++ q->depth = new.depth; ++ q->hash_divisor = new.hash_divisor; ++ q->hash_kind = new.hash_kind; ++ q->tail = new.tail; ++ q->max_depth = new.max_depth; ++ q->ht = new.ht; ++ q->dep = new.dep; ++ q->next = new.next; ++ q->allot = new.allot; ++ q->hash = new.hash; ++ q->qs = new.qs; ++ ++ /* finish up */ ++ if (q->perturb_period) { ++ q->perturb_timer.expires = jiffies + q->perturb_period; ++ add_timer(&q->perturb_timer); ++ } else { ++ q->perturbation = 0; ++ } ++ sch_tree_unlock(sch); ++ return 0; ++} ++ ++static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb) ++{ ++ struct esfq_sched_data *q = qdisc_priv(sch); ++ unsigned char *b = skb_tail_pointer(skb); ++ struct tc_esfq_qopt opt; ++ ++ opt.quantum = q->quantum; ++ opt.perturb_period = q->perturb_period/HZ; ++ ++ opt.limit = q->limit; ++ opt.divisor = q->hash_divisor; ++ opt.flows = q->depth; ++ opt.hash_kind = q->hash_kind; ++ ++ NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); ++ ++ return skb->len; ++ ++nla_put_failure: ++ nlmsg_trim(skb, b); ++ return -1; ++} ++ ++static struct Qdisc_ops esfq_qdisc_ops = ++{ ++ .next = NULL, ++ .cl_ops = NULL, ++ .id = "esfq", ++ .priv_size = sizeof(struct esfq_sched_data), ++ .enqueue = esfq_enqueue, ++ .dequeue = esfq_dequeue, ++ .peek = esfq_peek, ++ .drop = esfq_drop, ++ .init = esfq_init, ++ .reset = esfq_reset, ++ .destroy = esfq_destroy, ++ .change = esfq_change, ++ .dump = esfq_dump, ++ .owner = THIS_MODULE, ++}; ++ ++static int __init esfq_module_init(void) ++{ ++ return register_qdisc(&esfq_qdisc_ops); ++} ++static void __exit esfq_module_exit(void) ++{ ++ unregister_qdisc(&esfq_qdisc_ops); ++} ++module_init(esfq_module_init) ++module_exit(esfq_module_exit) ++MODULE_LICENSE("GPL"); + |
#vim 150-netfilter_imq.patch
--- /dev/null 2010-02-26 12:46:03.368409969 +0000 +++ 150-netfilter_imq.patch 2010-02-26 15:13:26.000000000 +0000 @@ -0,0 +1,1337 @@ +--- /dev/null ++++ b/drivers/net/imq.c +@@ -0,0 +1,632 @@ ++/* ++ * Pseudo-driver for the intermediate queue device. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * Authors: Patrick McHardy, <kaber@trash.net> ++ * ++ * The first version was written by Martin Devera, <devik@cdi.cz> ++ * ++ * Credits: Jan Rafaj <imq2t@cedric.vabo.cz> ++ * - Update patch to 2.4.21 ++ * Sebastian Strollo <sstrollo@nortelnetworks.com> ++ * - Fix "Dead-loop on netdevice imq"-issue ++ * Marcel Sebek <sebek64@post.cz> ++ * - Update to 2.6.2-rc1 ++ * ++ * After some time of inactivity there is a group taking care ++ * of IMQ again: http://www.linuximq.net ++ * ++ * ++ * 2004/06/30 - New version of IMQ patch to kernels <=2.6.7 ++ * including the following changes: ++ * ++ * - Correction of ipv6 support "+"s issue (Hasso Tepper) ++ * - Correction of imq_init_devs() issue that resulted in ++ * kernel OOPS unloading IMQ as module (Norbert Buchmuller) ++ * - Addition of functionality to choose number of IMQ devices ++ * during kernel config (Andre Correa) ++ * - Addition of functionality to choose how IMQ hooks on ++ * PRE and POSTROUTING (after or before NAT) (Andre Correa) ++ * - Cosmetic corrections (Norbert Buchmuller) (Andre Correa) ++ * ++ * ++ * 2005/12/16 - IMQ versions between 2.6.7 and 2.6.13 were ++ * released with almost no problems. 2.6.14-x was released ++ * with some important changes: nfcache was removed; After ++ * some weeks of trouble we figured out that some IMQ fields ++ * in skb were missing in skbuff.c - skb_clone and copy_skb_header. ++ * These functions are correctly patched by this new patch version. ++ * ++ * Thanks for all who helped to figure out all the problems with ++ * 2.6.14.x: Patrick McHardy, Rune Kock, VeNoMouS, Max CtRiX, ++ * Kevin Shanahan, Richard Lucassen, Valery Dachev (hopefully ++ * I didn't forget anybody). I apologize again for my lack of time. ++ * ++ * ++ * 2008/06/17 - 2.6.25 - Changed imq.c to use qdisc_run() instead ++ * of qdisc_restart() and moved qdisc_run() to tasklet to avoid ++ * recursive locking. New initialization routines to fix 'rmmod' not ++ * working anymore. Used code from ifb.c. (Jussi Kivilinna) ++ * ++ * 2008/08/06 - 2.6.26 - (JK) ++ * - Replaced tasklet with 'netif_schedule()'. ++ * - Cleaned up and added comments for imq_nf_queue(). ++ * ++ * 2009/04/12 ++ * - Add skb_save_cb/skb_restore_cb helper functions for backuping ++ * control buffer. This is needed because qdisc-layer on kernels ++ * 2.6.27 and newer overwrite control buffer. (Jussi Kivilinna) ++ * - Add better locking for IMQ device. Hopefully this will solve ++ * SMP issues. (Jussi Kivilinna) ++ * - Port to 2.6.27 ++ * - Port to 2.6.28 ++ * - Port to 2.6.29 + fix rmmod not working ++ * ++ * 2009/04/20 - (Jussi Kivilinna) ++ * - Use netdevice feature flags to avoid extra packet handling ++ * by core networking layer and possibly increase performance. ++ * ++ * 2009/09/26 - (Jussi Kivilinna) ++ * - Add imq_nf_reinject_lockless to fix deadlock with ++ * imq_nf_queue/imq_nf_reinject. ++ * ++ * 2009/12/08 - (Jussi Kivilinna) ++ * - Port to 2.6.32 ++ * - Add check for skb->nf_queue_entry==NULL in imq_dev_xmit() ++ * - Also add better error checking for skb->nf_queue_entry usage ++ * ++ * Also, many thanks to pablo Sebastian Greco for making the initial ++ * patch and to those who helped the testing. ++ * ++ * More info at: http://www.linuximq.net/ (Andre Correa) ++ */ ++ ++#include <linux/module.h> ++#include <linux/kernel.h> ++#include <linux/moduleparam.h> ++#include <linux/list.h> ++#include <linux/skbuff.h> ++#include <linux/netdevice.h> ++#include <linux/etherdevice.h> ++#include <linux/rtnetlink.h> ++#include <linux/if_arp.h> ++#include <linux/netfilter.h> ++#include <linux/netfilter_ipv4.h> ++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) ++ #include <linux/netfilter_ipv6.h> ++#endif ++#include <linux/imq.h> ++#include <net/pkt_sched.h> ++#include <net/netfilter/nf_queue.h> ++ ++static nf_hookfn imq_nf_hook; ++ ++static struct nf_hook_ops imq_ingress_ipv4 = { ++ .hook = imq_nf_hook, ++ .owner = THIS_MODULE, ++ .pf = PF_INET, ++ .hooknum = NF_INET_PRE_ROUTING, ++#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB) ++ .priority = NF_IP_PRI_MANGLE + 1 ++#else ++ .priority = NF_IP_PRI_NAT_DST + 1 ++#endif ++}; ++ ++static struct nf_hook_ops imq_egress_ipv4 = { ++ .hook = imq_nf_hook, ++ .owner = THIS_MODULE, ++ .pf = PF_INET, ++ .hooknum = NF_INET_POST_ROUTING, ++#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA) ++ .priority = NF_IP_PRI_LAST ++#else ++ .priority = NF_IP_PRI_NAT_SRC - 1 ++#endif ++}; ++ ++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) ++static struct nf_hook_ops imq_ingress_ipv6 = { ++ .hook = imq_nf_hook, ++ .owner = THIS_MODULE, ++ .pf = PF_INET6, ++ .hooknum = NF_INET_PRE_ROUTING, ++#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB) ++ .priority = NF_IP6_PRI_MANGLE + 1 ++#else ++ .priority = NF_IP6_PRI_NAT_DST + 1 ++#endif ++}; ++ ++static struct nf_hook_ops imq_egress_ipv6 = { ++ .hook = imq_nf_hook, ++ .owner = THIS_MODULE, ++ .pf = PF_INET6, ++ .hooknum = NF_INET_POST_ROUTING, ++#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA) ++ .priority = NF_IP6_PRI_LAST ++#else ++ .priority = NF_IP6_PRI_NAT_SRC - 1 ++#endif ++}; ++#endif ++ ++#if defined(CONFIG_IMQ_NUM_DEVS) ++static unsigned int numdevs = CONFIG_IMQ_NUM_DEVS; ++#else ++static unsigned int numdevs = IMQ_MAX_DEVS; ++#endif ++ ++static DEFINE_SPINLOCK(imq_nf_queue_lock); ++ ++static struct net_device *imq_devs_cache[IMQ_MAX_DEVS]; ++ ++ ++static struct net_device_stats *imq_get_stats(struct net_device *dev) ++{ ++ return &dev->stats; ++} ++ ++/* called for packets kfree'd in qdiscs at places other than enqueue */ ++static void imq_skb_destructor(struct sk_buff *skb) ++{ ++ struct nf_queue_entry *entry = skb->nf_queue_entry; ++ ++ skb->nf_queue_entry = NULL; ++ ++ if (entry) { ++ nf_queue_entry_release_refs(entry); ++ kfree(entry); ++ } ++ ++ skb_restore_cb(skb); /* kfree backup */ ++} ++ ++/* locking not needed when called from imq_nf_queue */ ++static void imq_nf_reinject_lockless(struct nf_queue_entry *entry, ++ unsigned int verdict) ++{ ++ int status; ++ ++ if (!entry->next_outfn) { ++ nf_reinject(entry, verdict); ++ return; ++ } ++ ++ status = entry->next_outfn(entry, entry->next_queuenum); ++ if (status < 0) { ++ nf_queue_entry_release_refs(entry); ++ kfree_skb(entry->skb); ++ kfree(entry); ++ } ++} ++ ++static void imq_nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) ++{ ++ int status; ++ ++ if (!entry->next_outfn) { ++ spin_lock_bh(&imq_nf_queue_lock); ++ nf_reinject(entry, verdict); ++ spin_unlock_bh(&imq_nf_queue_lock); ++ return; ++ } ++ ++ rcu_read_lock(); ++ local_bh_disable(); ++ status = entry->next_outfn(entry, entry->next_queuenum); ++ local_bh_enable(); ++ if (status < 0) { ++ nf_queue_entry_release_refs(entry); ++ kfree_skb(entry->skb); ++ kfree(entry); ++ } ++ ++ rcu_read_unlock(); ++} ++ ++static netdev_tx_t imq_dev_xmit(struct sk_buff *skb, struct net_device *dev) ++{ ++ struct nf_queue_entry *entry = skb->nf_queue_entry; ++ ++ skb->nf_queue_entry = NULL; ++ dev->trans_start = jiffies; ++ ++ dev->stats.tx_bytes += skb->len; ++ dev->stats.tx_packets++; ++ ++ if (entry == NULL) { ++ /* We don't know what is going on here.. packet is queued for ++ * imq device, but (probably) not by us. ++ * ++ * If this packet was not send here by imq_nf_queue(), then ++ * skb_save_cb() was not used and skb_free() should not show: ++ * WARNING: IMQ: kfree_skb: skb->cb_next:.. ++ * and/or ++ * WARNING: IMQ: kfree_skb: skb->nf_queue_entry... ++ * ++ * However if this message is shown, then IMQ is somehow broken ++ * and you should report this to linuximq.net. ++ */ ++ ++ /* imq_dev_xmit is black hole that eats all packets, report that ++ * we eat this packet happily and increase dropped counters. ++ */ ++ ++ dev->stats.tx_dropped++; ++ dev_kfree_skb(skb); ++ ++ return NETDEV_TX_OK; ++ } ++ ++ skb_restore_cb(skb); /* restore skb->cb */ ++ ++ skb->imq_flags = 0; ++ skb->destructor = NULL; ++ ++ imq_nf_reinject(entry, NF_ACCEPT); ++ ++ return NETDEV_TX_OK; ++} ++ ++static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num) ++{ ++ struct net_device *dev; ++ struct sk_buff *skb_orig, *skb, *skb_shared; ++ struct Qdisc *q; ++ struct netdev_queue *txq; ++ int users, index; ++ int retval = -EINVAL; ++ ++ index = entry->skb->imq_flags & IMQ_F_IFMASK; ++ if (unlikely(index > numdevs - 1)) { ++ if (net_ratelimit()) ++ printk(KERN_WARNING ++ "IMQ: invalid device specified, highest is %u\n", ++ numdevs - 1); ++ retval = -EINVAL; ++ goto out; ++ } ++ ++ /* check for imq device by index from cache */ ++ dev = imq_devs_cache[index]; ++ if (unlikely(!dev)) { ++ char buf[8]; ++ ++ /* get device by name and cache result */ ++ snprintf(buf, sizeof(buf), "imq%d", index); ++ dev = dev_get_by_name(&init_net, buf); ++ if (!dev) { ++ /* not found ?!*/ ++ BUG(); ++ retval = -ENODEV; ++ goto out; ++ } ++ ++ imq_devs_cache[index] = dev; ++ dev_put(dev); ++ } ++ ++ if (unlikely(!(dev->flags & IFF_UP))) { ++ entry->skb->imq_flags = 0; ++ imq_nf_reinject_lockless(entry, NF_ACCEPT); ++ retval = 0; ++ goto out; ++ } ++ dev->last_rx = jiffies; ++ ++ skb = entry->skb; ++ skb_orig = NULL; ++ ++ /* skb has owner? => make clone */ ++ if (unlikely(skb->destructor)) { ++ skb_orig = skb; ++ skb = skb_clone(skb, GFP_ATOMIC); ++ if (!skb) { ++ retval = -ENOMEM; ++ goto out; ++ } ++ entry->skb = skb; ++ } ++ ++ skb->nf_queue_entry = entry; ++ ++ dev->stats.rx_bytes += skb->len; ++ dev->stats.rx_packets++; ++ ++ txq = dev_pick_tx(dev, skb); ++ ++ q = rcu_dereference(txq->qdisc); ++ if (unlikely(!q->enqueue)) ++ goto packet_not_eaten_by_imq_dev; ++ ++ spin_lock_bh(qdisc_lock(q)); ++ ++ users = atomic_read(&skb->users); ++ ++ skb_shared = skb_get(skb); /* increase reference count by one */ ++ skb_save_cb(skb_shared); /* backup skb->cb, as qdisc layer will ++ overwrite it */ ++ qdisc_enqueue_root(skb_shared, q); /* might kfree_skb */ ++ ++ if (likely(atomic_read(&skb_shared->users) == users + 1)) { ++ kfree_skb(skb_shared); /* decrease reference count by one */ ++ ++ skb->destructor = &imq_skb_destructor; ++ ++ /* cloned? */ ++ if (skb_orig) ++ kfree_skb(skb_orig); /* free original */ ++ ++ spin_unlock_bh(qdisc_lock(q)); ++ ++ /* schedule qdisc dequeue */ ++ __netif_schedule(q); ++ ++ retval = 0; ++ goto out; ++ } else { ++ skb_restore_cb(skb_shared); /* restore skb->cb */ ++ skb->nf_queue_entry = NULL; ++ /* qdisc dropped packet and decreased skb reference count of ++ * skb, so we don't really want to and try refree as that would ++ * actually destroy the skb. */ ++ spin_unlock_bh(qdisc_lock(q)); ++ goto packet_not_eaten_by_imq_dev; ++ } ++ ++packet_not_eaten_by_imq_dev: ++ /* cloned? restore original */ ++ if (skb_orig) { ++ kfree_skb(skb); ++ entry->skb = skb_orig; ++ } ++ retval = -1; ++out: ++ return retval; ++} ++ ++static struct nf_queue_handler nfqh = { ++ .name = "imq", ++ .outfn = imq_nf_queue, ++}; ++ ++static unsigned int imq_nf_hook(unsigned int hook, struct sk_buff *pskb, ++ const struct net_device *indev, ++ const struct net_device *outdev, ++ int (*okfn)(struct sk_buff *)) ++{ ++ if (pskb->imq_flags & IMQ_F_ENQUEUE) ++ return NF_QUEUE; ++ ++ return NF_ACCEPT; ++} ++ ++static int imq_close(struct net_device *dev) ++{ ++ netif_stop_queue(dev); ++ return 0; ++} ++ ++static int imq_open(struct net_device *dev) ++{ ++ netif_start_queue(dev); ++ return 0; ++} ++ ++static const struct net_device_ops imq_netdev_ops = { ++ .ndo_open = imq_open, ++ .ndo_stop = imq_close, ++ .ndo_start_xmit = imq_dev_xmit, ++ .ndo_get_stats = imq_get_stats, ++}; ++ ++static void imq_setup(struct net_device *dev) ++{ ++ dev->netdev_ops = &imq_netdev_ops; ++ dev->type = ARPHRD_VOID; ++ dev->mtu = 16000; ++ dev->tx_queue_len = 11000; ++ dev->flags = IFF_NOARP; ++ dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | ++ NETIF_F_GSO | NETIF_F_HW_CSUM | ++ NETIF_F_HIGHDMA; ++ dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; ++} ++ ++static int imq_validate(struct nlattr *tb[], struct nlattr *data[]) ++{ ++ int ret = 0; ++ ++ if (tb[IFLA_ADDRESS]) { ++ if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { ++ ret = -EINVAL; ++ goto end; ++ } ++ if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { ++ ret = -EADDRNOTAVAIL; ++ goto end; ++ } ++ } ++ return 0; ++end: ++ printk(KERN_WARNING "IMQ: imq_validate failed (%d)\n", ret); ++ return ret; ++} ++ ++static struct rtnl_link_ops imq_link_ops __read_mostly = { ++ .kind = "imq", ++ .priv_size = 0, ++ .setup = imq_setup, ++ .validate = imq_validate, ++}; ++ ++static int __init imq_init_hooks(void) ++{ ++ int err; ++ ++ nf_register_queue_imq_handler(&nfqh); ++ ++ err = nf_register_hook(&imq_ingress_ipv4); ++ if (err) ++ goto err1; ++ ++ err = nf_register_hook(&imq_egress_ipv4); ++ if (err) ++ goto err2; ++ ++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) ++ err = nf_register_hook(&imq_ingress_ipv6); ++ if (err) ++ goto err3; ++ ++ err = nf_register_hook(&imq_egress_ipv6); ++ if (err) ++ goto err4; ++#endif ++ ++ return 0; ++ ++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) ++err4: ++ nf_unregister_hook(&imq_ingress_ipv6); ++err3: ++ nf_unregister_hook(&imq_egress_ipv4); ++#endif ++err2: ++ nf_unregister_hook(&imq_ingress_ipv4); ++err1: ++ nf_unregister_queue_imq_handler(); ++ return err; ++} ++ ++static int __init imq_init_one(int index) ++{ ++ struct net_device *dev; ++ int ret; ++ ++ dev = alloc_netdev(0, "imq%d", imq_setup); ++ if (!dev) ++ return -ENOMEM; ++ ++ ret = dev_alloc_name(dev, dev->name); ++ if (ret < 0) ++ goto fail; ++ ++ dev->rtnl_link_ops = &imq_link_ops; ++ ret = register_netdevice(dev); ++ if (ret < 0) ++ goto fail; ++ ++ return 0; ++fail: ++ free_netdev(dev); ++ return ret; ++} ++ ++static int __init imq_init_devs(void) ++{ ++ int err, i; ++ ++ if (numdevs < 1 || numdevs > IMQ_MAX_DEVS) { ++ printk(KERN_ERR "IMQ: numdevs has to be betweed 1 and %u\n", ++ IMQ_MAX_DEVS); ++ return -EINVAL; ++ } ++ ++ rtnl_lock(); ++ err = __rtnl_link_register(&imq_link_ops); ++ ++ for (i = 0; i < numdevs && !err; i++) ++ err = imq_init_one(i); ++ ++ if (err) { ++ __rtnl_link_unregister(&imq_link_ops); ++ memset(imq_devs_cache, 0, sizeof(imq_devs_cache)); ++ } ++ rtnl_unlock(); ++ ++ return err; ++} ++ ++static int __init imq_init_module(void) ++{ ++ int err; ++ ++#if defined(CONFIG_IMQ_NUM_DEVS) ++ BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS > 16); ++ BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS < 2); ++ BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS - 1 > IMQ_F_IFMASK); ++#endif ++ ++ err = imq_init_devs(); ++ if (err) { ++ printk(KERN_ERR "IMQ: Error trying imq_init_devs(net)\n"); ++ return err; ++ } ++ ++ err = imq_init_hooks(); ++ if (err) { ++ printk(KERN_ERR "IMQ: Error trying imq_init_hooks()\n"); ++ rtnl_link_unregister(&imq_link_ops); ++ memset(imq_devs_cache, 0, sizeof(imq_devs_cache)); ++ return err; ++ } ++ ++ printk(KERN_INFO "IMQ driver loaded successfully.\n"); ++ ++#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB) ++ printk(KERN_INFO "\tHooking IMQ before NAT on PREROUTING.\n"); ++#else ++ printk(KERN_INFO "\tHooking IMQ after NAT on PREROUTING.\n"); ++#endif ++#if defined(CONFIG_IMQ_BEHAVIOR_AB) || defined(CONFIG_IMQ_BEHAVIOR_BB) ++ printk(KERN_INFO "\tHooking IMQ before NAT on POSTROUTING.\n"); ++#else ++ printk(KERN_INFO "\tHooking IMQ after NAT on POSTROUTING.\n"); ++#endif ++ ++ return 0; ++} ++ ++static void __exit imq_unhook(void) ++{ ++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) ++ nf_unregister_hook(&imq_ingress_ipv6); ++ nf_unregister_hook(&imq_egress_ipv6); ++#endif ++ nf_unregister_hook(&imq_ingress_ipv4); ++ nf_unregister_hook(&imq_egress_ipv4); ++ ++ nf_unregister_queue_imq_handler(); ++} ++ ++static void __exit imq_cleanup_devs(void) ++{ ++ rtnl_link_unregister(&imq_link_ops); ++ memset(imq_devs_cache, 0, sizeof(imq_devs_cache)); ++} ++ ++static void __exit imq_exit_module(void) ++{ ++ imq_unhook(); ++ imq_cleanup_devs(); ++ printk(KERN_INFO "IMQ driver unloaded successfully.\n"); ++} ++ ++module_init(imq_init_module); ++module_exit(imq_exit_module); ++ ++module_param(numdevs, int, 0); ++MODULE_PARM_DESC(numdevs, "number of IMQ devices (how many imq* devices will " ++ "be created)"); ++MODULE_AUTHOR("http://www.linuximq.net"); ++MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See " ++ "http://www.linuximq.net/ for more information."); ++MODULE_LICENSE("GPL"); ++MODULE_ALIAS_RTNL_LINK("imq"); ++ +--- a/drivers/net/Kconfig ++++ b/drivers/net/Kconfig +@@ -109,6 +109,129 @@ config EQUALIZER + To compile this driver as a module, choose M here: the module + will be called eql. If unsure, say N. + ++config IMQ ++ tristate "IMQ (intermediate queueing device) support" ++ depends on NETDEVICES && NETFILTER ++ ---help--- ++ The IMQ device(s) is used as placeholder for QoS queueing ++ disciplines. Every packet entering/leaving the IP stack can be ++ directed through the IMQ device where it's enqueued/dequeued to the ++ attached qdisc. This allows you to treat network devices as classes ++ and distribute bandwidth among them. Iptables is used to specify ++ through which IMQ device, if any, packets travel. ++ ++ More information at: http://www.linuximq.net/ ++ ++ To compile this driver as a module, choose M here: the module ++ will be called imq. If unsure, say N. ++ ++choice ++ prompt "IMQ behavior (PRE/POSTROUTING)" ++ depends on IMQ ++ default IMQ_BEHAVIOR_AB ++ help ++ ++ This settings defines how IMQ behaves in respect to its ++ hooking in PREROUTING and POSTROUTING. ++ ++ IMQ can work in any of the following ways: ++ ++ PREROUTING | POSTROUTING ++ -----------------|------------------- ++ #1 After NAT | After NAT ++ #2 After NAT | Before NAT ++ #3 Before NAT | After NAT ++ #4 Before NAT | Before NAT ++ ++ The default behavior is to hook before NAT on PREROUTING ++ and after NAT on POSTROUTING (#3). ++ ++ This settings are specially usefull when trying to use IMQ ++ to shape NATed clients. ++ ++ More information can be found at: www.linuximq.net ++ ++ If not sure leave the default settings alone. ++ ++config IMQ_BEHAVIOR_AA ++ bool "IMQ AA" ++ help ++ This settings defines how IMQ behaves in respect to its ++ hooking in PREROUTING and POSTROUTING. ++ ++ Choosing this option will make IMQ hook like this: ++ ++ PREROUTING: After NAT ++ POSTROUTING: After NAT ++ ++ More information can be found at: www.linuximq.net ++ ++ If not sure leave the default settings alone. ++ ++config IMQ_BEHAVIOR_AB ++ bool "IMQ AB" ++ help ++ This settings defines how IMQ behaves in respect to its ++ hooking in PREROUTING and POSTROUTING. ++ ++ Choosing this option will make IMQ hook like this: ++ ++ PREROUTING: After NAT ++ POSTROUTING: Before NAT ++ ++ More information can be found at: www.linuximq.net ++ ++ If not sure leave the default settings alone. ++ ++config IMQ_BEHAVIOR_BA ++ bool "IMQ BA" ++ help ++ This settings defines how IMQ behaves in respect to its ++ hooking in PREROUTING and POSTROUTING. ++ ++ Choosing this option will make IMQ hook like this: ++ ++ PREROUTING: Before NAT ++ POSTROUTING: After NAT ++ ++ More information can be found at: www.linuximq.net ++ ++ If not sure leave the default settings alone. ++ ++config IMQ_BEHAVIOR_BB ++ bool "IMQ BB" ++ help ++ This settings defines how IMQ behaves in respect to its ++ hooking in PREROUTING and POSTROUTING. ++ ++ Choosing this option will make IMQ hook like this: ++ ++ PREROUTING: Before NAT ++ POSTROUTING: Before NAT ++ ++ More information can be found at: www.linuximq.net ++ ++ If not sure leave the default settings alone. ++ ++endchoice ++ ++config IMQ_NUM_DEVS ++ ++ int "Number of IMQ devices" ++ range 2 16 ++ depends on IMQ ++ default "16" ++ help ++ ++ This settings defines how many IMQ devices will be ++ created. ++ ++ The default value is 16. ++ ++ More information can be found at: www.linuximq.net ++ ++ If not sure leave the default settings alone. ++ + config TUN + tristate "Universal TUN/TAP device driver support" + select CRC32 +--- a/drivers/net/Makefile ++++ b/drivers/net/Makefile +@@ -165,6 +165,7 @@ obj-$(CONFIG_SLHC) += slhc.o + obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o + + obj-$(CONFIG_DUMMY) += dummy.o ++obj-$(CONFIG_IMQ) += imq.o + obj-$(CONFIG_IFB) += ifb.o + obj-$(CONFIG_MACVLAN) += macvlan.o + obj-$(CONFIG_DE600) += de600.o +--- /dev/null ++++ b/include/linux/imq.h +@@ -0,0 +1,13 @@ ++#ifndef _IMQ_H ++#define _IMQ_H ++ ++/* IFMASK (16 device indexes, 0 to 15) and flag(s) fit in 5 bits */ ++#define IMQ_F_BITS 5 ++ ++#define IMQ_F_IFMASK 0x0f ++#define IMQ_F_ENQUEUE 0x10 ++ ++#define IMQ_MAX_DEVS (IMQ_F_IFMASK + 1) ++ ++#endif /* _IMQ_H */ ++ +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -1114,6 +1114,7 @@ extern int dev_alloc_name(struct net_de + extern int dev_open(struct net_device *dev); + extern int dev_close(struct net_device *dev); + extern void dev_disable_lro(struct net_device *dev); ++extern struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb); + extern int dev_queue_xmit(struct sk_buff *skb); + extern int register_netdevice(struct net_device *dev); + extern void unregister_netdevice(struct net_device *dev); +--- /dev/null ++++ b/include/linux/netfilter/xt_IMQ.h +@@ -0,0 +1,9 @@ ++#ifndef _XT_IMQ_H ++#define _XT_IMQ_H ++ ++struct xt_imq_info { ++ unsigned int todev; /* target imq device */ ++}; ++ ++#endif /* _XT_IMQ_H */ ++ +--- /dev/null ++++ b/include/linux/netfilter_ipv4/ipt_IMQ.h +@@ -0,0 +1,10 @@ ++#ifndef _IPT_IMQ_H ++#define _IPT_IMQ_H ++ ++/* Backwards compatibility for old userspace */ ++#include <linux/netfilter/xt_IMQ.h> ++ ++#define ipt_imq_info xt_imq_info ++ ++#endif /* _IPT_IMQ_H */ ++ +--- /dev/null ++++ b/include/linux/netfilter_ipv6/ip6t_IMQ.h +@@ -0,0 +1,10 @@ ++#ifndef _IP6T_IMQ_H ++#define _IP6T_IMQ_H ++ ++/* Backwards compatibility for old userspace */ ++#include <linux/netfilter/xt_IMQ.h> ++ ++#define ip6t_imq_info xt_imq_info ++ ++#endif /* _IP6T_IMQ_H */ ++ +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -29,6 +29,9 @@ + #include <linux/rcupdate.h> + #include <linux/dmaengine.h> + #include <linux/hrtimer.h> ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++#include <linux/imq.h> ++#endif + + /* Don't change this without changing skb_csum_unnecessary! */ + #define CHECKSUM_NONE 0 +@@ -330,6 +333,9 @@ struct sk_buff { + * first. This is owned by whoever has the skb queued ATM. + */ + char cb[48]; ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ void *cb_next; ++#endif + + unsigned int len, + data_len; +@@ -362,6 +368,9 @@ struct sk_buff { + struct nf_conntrack *nfct; + struct sk_buff *nfct_reasm; + #endif ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ struct nf_queue_entry *nf_queue_entry; ++#endif + #ifdef CONFIG_BRIDGE_NETFILTER + struct nf_bridge_info *nf_bridge; + #endif +@@ -383,6 +392,10 @@ struct sk_buff { + + /* 0/14 bit hole */ + ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ __u8 imq_flags:IMQ_F_BITS; ++#endif ++ + #ifdef CONFIG_NET_DMA + dma_cookie_t dma_cookie; + #endif +@@ -437,6 +450,12 @@ static inline struct rtable *skb_rtable( + return (struct rtable *)skb_dst(skb); + } + ++ ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++extern int skb_save_cb(struct sk_buff *skb); ++extern int skb_restore_cb(struct sk_buff *skb); ++#endif ++ + extern void kfree_skb(struct sk_buff *skb); + extern void consume_skb(struct sk_buff *skb); + extern void __kfree_skb(struct sk_buff *skb); +@@ -1972,6 +1991,10 @@ static inline void __nf_copy(struct sk_b + dst->nfct_reasm = src->nfct_reasm; + nf_conntrack_get_reasm(src->nfct_reasm); + #endif ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ dst->imq_flags = src->imq_flags; ++ dst->nf_queue_entry = src->nf_queue_entry; ++#endif + #ifdef CONFIG_BRIDGE_NETFILTER + dst->nf_bridge = src->nf_bridge; + nf_bridge_get(src->nf_bridge); +--- a/include/net/netfilter/nf_queue.h ++++ b/include/net/netfilter/nf_queue.h +@@ -13,6 +13,12 @@ struct nf_queue_entry { + struct net_device *indev; + struct net_device *outdev; + int (*okfn)(struct sk_buff *); ++ ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ int (*next_outfn)(struct nf_queue_entry *entry, ++ unsigned int queuenum); ++ unsigned int next_queuenum; ++#endif + }; + + #define nf_queue_entry_reroute(x) ((void *)x + sizeof(struct nf_queue_entry)) +@@ -30,5 +36,11 @@ extern int nf_unregister_queue_handler(u + const struct nf_queue_handler *qh); + extern void nf_unregister_queue_handlers(const struct nf_queue_handler *qh); + extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); ++extern void nf_queue_entry_release_refs(struct nf_queue_entry *entry); ++ ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++extern void nf_register_queue_imq_handler(const struct nf_queue_handler *qh); ++extern void nf_unregister_queue_imq_handler(void); ++#endif + + #endif /* _NF_QUEUE_H */ +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -96,6 +96,9 @@ + #include <net/net_namespace.h> + #include <net/sock.h> + #include <linux/rtnetlink.h> ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++#include <linux/imq.h> ++#endif + #include <linux/proc_fs.h> + #include <linux/seq_file.h> + #include <linux/stat.h> +@@ -1704,7 +1707,11 @@ int dev_hard_start_xmit(struct sk_buff * + int rc; + + if (likely(!skb->next)) { +- if (!list_empty(&ptype_all)) ++ if (!list_empty(&ptype_all) ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ && !(skb->imq_flags & IMQ_F_ENQUEUE) ++#endif ++ ) + dev_queue_xmit_nit(skb, dev); + + if (netif_needs_gso(dev, skb)) { +@@ -1789,8 +1796,7 @@ u16 skb_tx_hash(const struct net_device + } + EXPORT_SYMBOL(skb_tx_hash); + +-static struct netdev_queue *dev_pick_tx(struct net_device *dev, +- struct sk_buff *skb) ++struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) + { + const struct net_device_ops *ops = dev->netdev_ops; + u16 queue_index = 0; +@@ -1803,6 +1809,7 @@ static struct netdev_queue *dev_pick_tx( + skb_set_queue_mapping(skb, queue_index); + return netdev_get_tx_queue(dev, queue_index); + } ++EXPORT_SYMBOL(dev_pick_tx); + + static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, + struct net_device *dev, +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -72,6 +72,9 @@ + + static struct kmem_cache *skbuff_head_cache __read_mostly; + static struct kmem_cache *skbuff_fclone_cache __read_mostly; ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++static struct kmem_cache *skbuff_cb_store_cache __read_mostly; ++#endif + + static void sock_pipe_buf_release(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +@@ -91,6 +94,83 @@ static int sock_pipe_buf_steal(struct pi + return 1; + } + ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++/* Control buffer save/restore for IMQ devices */ ++struct skb_cb_table { ++ void *cb_next; ++ atomic_t refcnt; ++ char cb[48]; ++}; ++ ++static DEFINE_SPINLOCK(skb_cb_store_lock); ++ ++int skb_save_cb(struct sk_buff *skb) ++{ ++ struct skb_cb_table *next; ++ ++ next = kmem_cache_alloc(skbuff_cb_store_cache, GFP_ATOMIC); ++ if (!next) ++ return -ENOMEM; ++ ++ BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb)); ++ ++ memcpy(next->cb, skb->cb, sizeof(skb->cb)); ++ next->cb_next = skb->cb_next; ++ ++ atomic_set(&next->refcnt, 1); ++ ++ skb->cb_next = next; ++ return 0; ++} ++EXPORT_SYMBOL(skb_save_cb); ++ ++int skb_restore_cb(struct sk_buff *skb) ++{ ++ struct skb_cb_table *next; ++ ++ if (!skb->cb_next) ++ return 0; ++ ++ next = skb->cb_next; ++ ++ BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb)); ++ ++ memcpy(skb->cb, next->cb, sizeof(skb->cb)); ++ skb->cb_next = next->cb_next; ++ ++ spin_lock(&skb_cb_store_lock); ++ ++ if (atomic_dec_and_test(&next->refcnt)) { ++ kmem_cache_free(skbuff_cb_store_cache, next); ++ } ++ ++ spin_unlock(&skb_cb_store_lock); ++ ++ return 0; ++} ++EXPORT_SYMBOL(skb_restore_cb); ++ ++static void skb_copy_stored_cb(struct sk_buff *new, const struct sk_buff *__old) ++{ ++ struct skb_cb_table *next; ++ struct sk_buff *old; ++ ++ if (!__old->cb_next) { ++ new->cb_next = NULL; ++ return; ++ } ++ ++ spin_lock(&skb_cb_store_lock); ++ ++ old = (struct sk_buff *)__old; ++ ++ next = old->cb_next; ++ atomic_inc(&next->refcnt); ++ new->cb_next = next; ++ ++ spin_unlock(&skb_cb_store_lock); ++} ++#endif + + /* Pipe buffer operations for a socket. */ + static struct pipe_buf_operations sock_pipe_buf_ops = { +@@ -398,6 +478,26 @@ static void skb_release_head_state(struc + WARN_ON(in_irq()); + skb->destructor(skb); + } ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ /* This should not happen. When it does, avoid memleak by restoring ++ the chain of cb-backups. */ ++ while(skb->cb_next != NULL) { ++ if (net_ratelimit()) ++ printk(KERN_WARNING "IMQ: kfree_skb: skb->cb_next: " ++ "%08x\n", (unsigned int)skb->cb_next); ++ ++ skb_restore_cb(skb); ++ } ++ /* This should not happen either, nf_queue_entry is nullified in ++ * imq_dev_xmit(). If we have non-NULL nf_queue_entry then we are ++ * leaking entry pointers, maybe memory. We don't know if this is ++ * pointer to already freed memory, or should this be freed. ++ * If this happens we need to add refcounting, etc for nf_queue_entry. ++ */ ++ if (skb->nf_queue_entry && net_ratelimit()) ++ printk(KERN_WARNING ++ "IMQ: kfree_skb: skb->nf_queue_entry != NULL"); ++#endif + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + nf_conntrack_put(skb->nfct); + nf_conntrack_put_reasm(skb->nfct_reasm); +@@ -535,6 +635,9 @@ static void __copy_skb_header(struct sk_ + new->sp = secpath_get(old->sp); + #endif + memcpy(new->cb, old->cb, sizeof(old->cb)); ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ skb_copy_stored_cb(new, old); ++#endif + new->csum = old->csum; + new->local_df = old->local_df; + new->pkt_type = old->pkt_type; +@@ -2776,6 +2879,13 @@ void __init skb_init(void) + 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, + NULL); ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ skbuff_cb_store_cache = kmem_cache_create("skbuff_cb_store_cache", ++ sizeof(struct skb_cb_table), ++ 0, ++ SLAB_HWCACHE_ALIGN|SLAB_PANIC, ++ NULL); ++#endif + } + + /** +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -396,6 +396,18 @@ config NETFILTER_XT_TARGET_LED + For more information on the LEDs available on your system, see + Documentation/leds-class.txt + ++config NETFILTER_XT_TARGET_IMQ ++ tristate '"IMQ" target support' ++ depends on NETFILTER_XTABLES ++ depends on IP_NF_MANGLE || IP6_NF_MANGLE ++ select IMQ ++ default m if NETFILTER_ADVANCED=n ++ help ++ This option adds a `IMQ' target which is used to specify if and ++ to which imq device packets should get enqueued/dequeued. ++ ++ To compile it as a module, choose M here. If unsure, say N. ++ + config NETFILTER_XT_TARGET_MARK + tristate '"MARK" target support' + default m if NETFILTER_ADVANCED=n +--- a/net/netfilter/Makefile ++++ b/net/netfilter/Makefile +@@ -46,6 +46,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMAR + obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o + obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o + obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o ++obj-$(CONFIG_NETFILTER_XT_TARGET_IMQ) += xt_IMQ.o + obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o + obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o + obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o +--- a/net/netfilter/nf_queue.c ++++ b/net/netfilter/nf_queue.c +@@ -20,6 +20,26 @@ static const struct nf_queue_handler *qu + + static DEFINE_MUTEX(queue_handler_mutex); + ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++static const struct nf_queue_handler *queue_imq_handler; ++ ++void nf_register_queue_imq_handler(const struct nf_queue_handler *qh) ++{ ++ mutex_lock(&queue_handler_mutex); ++ rcu_assign_pointer(queue_imq_handler, qh); ++ mutex_unlock(&queue_handler_mutex); ++} ++EXPORT_SYMBOL(nf_register_queue_imq_handler); ++ ++void nf_unregister_queue_imq_handler(void) ++{ ++ mutex_lock(&queue_handler_mutex); ++ rcu_assign_pointer(queue_imq_handler, NULL); ++ mutex_unlock(&queue_handler_mutex); ++} ++EXPORT_SYMBOL(nf_unregister_queue_imq_handler); ++#endif ++ + /* return EBUSY when somebody else is registered, return EEXIST if the + * same handler is registered, return 0 in case of success. */ + int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) +@@ -80,7 +100,7 @@ void nf_unregister_queue_handlers(const + } + EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers); + +-static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) ++void nf_queue_entry_release_refs(struct nf_queue_entry *entry) + { + /* Release those devices we held, or Alexey will kill me. */ + if (entry->indev) +@@ -100,6 +120,7 @@ static void nf_queue_entry_release_refs( + /* Drop reference to owner of hook which queued us. */ + module_put(entry->elem->owner); + } ++EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs); + + /* + * Any packet that leaves via this function must come back +@@ -121,12 +142,26 @@ static int __nf_queue(struct sk_buff *sk + #endif + const struct nf_afinfo *afinfo; + const struct nf_queue_handler *qh; ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ const struct nf_queue_handler *qih = NULL; ++#endif + + /* QUEUE == DROP if noone is waiting, to be safe. */ + rcu_read_lock(); + + qh = rcu_dereference(queue_handler[pf]); ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) ++ if (pf == PF_INET || pf == PF_INET6) ++#else ++ if (pf == PF_INET) ++#endif ++ qih = rcu_dereference(queue_imq_handler); ++ ++ if (!qh && !qih) ++#else /* !IMQ */ + if (!qh) ++#endif + goto err_unlock; + + afinfo = nf_get_afinfo(pf); +@@ -145,6 +180,10 @@ static int __nf_queue(struct sk_buff *sk + .indev = indev, + .outdev = outdev, + .okfn = okfn, ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ .next_outfn = qh ? qh->outfn : NULL, ++ .next_queuenum = queuenum, ++#endif + }; + + /* If it's going away, ignore hook. */ +@@ -170,8 +209,19 @@ static int __nf_queue(struct sk_buff *sk + } + #endif + afinfo->saveroute(skb, entry); ++ ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++ if (qih) { ++ status = qih->outfn(entry, queuenum); ++ goto imq_skip_queue; ++ } ++#endif ++ + status = qh->outfn(entry, queuenum); + ++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) ++imq_skip_queue: ++#endif + rcu_read_unlock(); + + if (status < 0) { +--- /dev/null ++++ b/net/netfilter/xt_IMQ.c +@@ -0,0 +1,73 @@ ++/* ++ * This target marks packets to be enqueued to an imq device ++ */ ++#include <linux/module.h> ++#include <linux/skbuff.h> ++#include <linux/netfilter/x_tables.h> ++#include <linux/netfilter/xt_IMQ.h> ++#include <linux/imq.h> ++ ++static unsigned int imq_target(struct sk_buff *pskb, ++ const struct xt_target_param *par) ++{ ++ const struct xt_imq_info *mr = par->targinfo; ++ ++ pskb->imq_flags = (mr->todev & IMQ_F_IFMASK) | IMQ_F_ENQUEUE; ++ ++ return XT_CONTINUE; ++} ++ ++static bool imq_checkentry(const struct xt_tgchk_param *par) ++{ ++ struct xt_imq_info *mr = par->targinfo; ++ ++ if (mr->todev > IMQ_MAX_DEVS - 1) { ++ printk(KERN_WARNING ++ "IMQ: invalid device specified, highest is %u\n", ++ IMQ_MAX_DEVS - 1); ++ return 0; ++ } ++ ++ return 1; ++} ++ ++static struct xt_target xt_imq_reg[] __read_mostly = { ++ { ++ .name = "IMQ", ++ .family = AF_INET, ++ .checkentry = imq_checkentry, ++ .target = imq_target, ++ .targetsize = sizeof(struct xt_imq_info), ++ .table = "mangle", ++ .me = THIS_MODULE ++ }, ++ { ++ .name = "IMQ", ++ .family = AF_INET6, ++ .checkentry = imq_checkentry, ++ .target = imq_target, ++ .targetsize = sizeof(struct xt_imq_info), ++ .table = "mangle", ++ .me = THIS_MODULE ++ }, ++}; ++ ++static int __init imq_init(void) ++{ ++ return xt_register_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg)); ++} ++ ++static void __exit imq_fini(void) ++{ ++ xt_unregister_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg)); ++} ++ ++module_init(imq_init); ++module_exit(imq_fini); ++ ++MODULE_AUTHOR("http://www.linuximq.net"); ++MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See http://www.linuximq.net/ for more information."); ++MODULE_LICENSE("GPL"); ++MODULE_ALIAS("ipt_IMQ"); ++MODULE_ALIAS("ip6t_IMQ"); ++ + |
#vim 100-netfilter_layer7_2.21.patch
--- /dev/null 2010-02-26 12:46:03.368409969 +0000 +++ 100-netfilter_layer7_2.21.patch 2010-02-26 15:14:35.000000000 +0000 @@ -0,0 +1,2133 @@ +--- /dev/null ++++ b/include/linux/netfilter/xt_layer7.h +@@ -0,0 +1,13 @@ ++#ifndef _XT_LAYER7_H ++#define _XT_LAYER7_H ++ ++#define MAX_PATTERN_LEN 8192 ++#define MAX_PROTOCOL_LEN 256 ++ ++struct xt_layer7_info { ++ char protocol[MAX_PROTOCOL_LEN]; ++ char pattern[MAX_PATTERN_LEN]; ++ u_int8_t invert; ++}; ++ ++#endif /* _XT_LAYER7_H */ +--- a/include/net/netfilter/nf_conntrack.h ++++ b/include/net/netfilter/nf_conntrack.h +@@ -116,6 +116,22 @@ struct nf_conn { + u_int32_t secmark; + #endif + ++#if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || \ ++ defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE) ++ struct { ++ /* ++ * e.g. "http". NULL before decision. "unknown" after decision ++ * if no match. ++ */ ++ char *app_proto; ++ /* ++ * application layer data so far. NULL after match decision. ++ */ ++ char *app_data; ++ unsigned int app_data_len; ++ } layer7; ++#endif ++ + /* Storage reserved for other modules: */ + union nf_conntrack_proto proto; + +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -858,6 +858,27 @@ config NETFILTER_XT_MATCH_STATE + + To compile it as a module, choose M here. If unsure, say N. + ++config NETFILTER_XT_MATCH_LAYER7 ++ tristate '"layer7" match support' ++ depends on NETFILTER_XTABLES ++ depends on EXPERIMENTAL && (IP_NF_CONNTRACK || NF_CONNTRACK) ++ depends on NF_CT_ACCT ++ help ++ Say Y if you want to be able to classify connections (and their ++ packets) based on regular expression matching of their application ++ layer data. This is one way to classify applications such as ++ peer-to-peer filesharing systems that do not always use the same ++ port. ++ ++ To compile it as a module, choose M here. If unsure, say N. ++ ++config NETFILTER_XT_MATCH_LAYER7_DEBUG ++ bool 'Layer 7 debugging output' ++ depends on NETFILTER_XT_MATCH_LAYER7 ++ help ++ Say Y to get lots of debugging output. ++ ++ + config NETFILTER_XT_MATCH_STATISTIC + tristate '"statistic" match support' + depends on NETFILTER_ADVANCED +--- a/net/netfilter/Makefile ++++ b/net/netfilter/Makefile +@@ -89,6 +89,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) + obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o + obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o + obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o ++obj-$(CONFIG_NETFILTER_XT_MATCH_LAYER7) += xt_layer7.o + obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o + obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o + obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o +--- a/net/netfilter/nf_conntrack_core.c ++++ b/net/netfilter/nf_conntrack_core.c +@@ -201,6 +201,14 @@ destroy_conntrack(struct nf_conntrack *n + * too. */ + nf_ct_remove_expectations(ct); + ++ #if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE) ++ if(ct->layer7.app_proto) ++ kfree(ct->layer7.app_proto); ++ if(ct->layer7.app_data) ++ kfree(ct->layer7.app_data); ++ #endif ++ ++ + /* We overload first tuple to link into unconfirmed list. */ + if (!nf_ct_is_confirmed(ct)) { + BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode)); +--- a/net/netfilter/nf_conntrack_standalone.c ++++ b/net/netfilter/nf_conntrack_standalone.c +@@ -171,6 +171,12 @@ static int ct_seq_show(struct seq_file * + goto release; + #endif + ++#if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE) ++ if(ct->layer7.app_proto && ++ seq_printf(s, "l7proto=%s ", ct->layer7.app_proto)) ++ return -ENOSPC; ++#endif ++ + if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) + goto release; + +--- /dev/null ++++ b/net/netfilter/regexp/regexp.c +@@ -0,0 +1,1197 @@ ++/* ++ * regcomp and regexec -- regsub and regerror are elsewhere ++ * @(#)regexp.c 1.3 of 18 April 87 ++ * ++ * Copyright (c) 1986 by University of Toronto. ++ * Written by Henry Spencer. Not derived from licensed software. ++ * ++ * Permission is granted to anyone to use this software for any ++ * purpose on any computer system, and to redistribute it freely, ++ * subject to the following restrictions: ++ * ++ * 1. The author is not responsible for the consequences of use of ++ * this software, no matter how awful, even if they arise ++ * from defects in it. ++ * ++ * 2. The origin of this software must not be misrepresented, either ++ * by explicit claim or by omission. ++ * ++ * 3. Altered versions must be plainly marked as such, and must not ++ * be misrepresented as being the original software. ++ * ++ * Beware that some of this code is subtly aware of the way operator ++ * precedence is structured in regular expressions. Serious changes in ++ * regular-expression syntax might require a total rethink. ++ * ++ * This code was modified by Ethan Sommer to work within the kernel ++ * (it now uses kmalloc etc..) ++ * ++ * Modified slightly by Matthew Strait to use more modern C. ++ */ ++ ++#include "regexp.h" ++#include "regmagic.h" ++ ++/* added by ethan and matt. Lets it work in both kernel and user space. ++(So iptables can use it, for instance.) Yea, it goes both ways... */ ++#if __KERNEL__ ++ #define malloc(foo) kmalloc(foo,GFP_ATOMIC) ++#else ++ #define printk(format,args...) printf(format,##args) ++#endif ++ ++void regerror(char * s) ++{ ++ printk("<3>Regexp: %s\n", s); ++ /* NOTREACHED */ ++} ++ ++/* ++ * The "internal use only" fields in regexp.h are present to pass info from ++ * compile to execute that permits the execute phase to run lots faster on ++ * simple cases. They are: ++ * ++ * regstart char that must begin a match; '\0' if none obvious ++ * reganch is the match anchored (at beginning-of-line only)? ++ * regmust string (pointer into program) that match must include, or NULL ++ * regmlen length of regmust string ++ * ++ * Regstart and reganch permit very fast decisions on suitable starting points ++ * for a match, cutting down the work a lot. Regmust permits fast rejection ++ * of lines that cannot possibly match. The regmust tests are costly enough ++ * that regcomp() supplies a regmust only if the r.e. contains something ++ * potentially expensive (at present, the only such thing detected is * or + ++ * at the start of the r.e., which can involve a lot of backup). Regmlen is ++ * supplied because the test in regexec() needs it and regcomp() is computing ++ * it anyway. ++ */ ++ ++/* ++ * Structure for regexp "program". This is essentially a linear encoding ++ * of a nondeterministic finite-state machine (aka syntax charts or ++ * "railroad normal form" in parsing technology). Each node is an opcode ++ * plus a "next" pointer, possibly plus an operand. "Next" pointers of ++ * all nodes except BRANCH implement concatenation; a "next" pointer with ++ * a BRANCH on both ends of it is connecting two alternatives. (Here we ++ * have one of the subtle syntax dependencies: an individual BRANCH (as ++ * opposed to a collection of them) is never concatenated with anything ++ * because of operator precedence.) The operand of some types of node is ++ * a literal string; for others, it is a node leading into a sub-FSM. In ++ * particular, the operand of a BRANCH node is the first node of the branch. ++ * (NB this is *not* a tree structure: the tail of the branch connects ++ * to the thing following the set of BRANCHes.) The opcodes are: ++ */ ++ ++/* definition number opnd? meaning */ ++#define END 0 /* no End of program. */ ++#define BOL 1 /* no Match "" at beginning of line. */ ++#define EOL 2 /* no Match "" at end of line. */ ++#define ANY 3 /* no Match any one character. */ ++#define ANYOF 4 /* str Match any character in this string. */ ++#define ANYBUT 5 /* str Match any character not in this string. */ ++#define BRANCH 6 /* node Match this alternative, or the next... */ ++#define BACK 7 /* no Match "", "next" ptr points backward. */ ++#define EXACTLY 8 /* str Match this string. */ ++#define NOTHING 9 /* no Match empty string. */ ++#define STAR 10 /* node Match this (simple) thing 0 or more times. */ ++#define PLUS 11 /* node Match this (simple) thing 1 or more times. */ ++#define OPEN 20 /* no Mark this point in input as start of #n. */ ++ /* OPEN+1 is number 1, etc. */ ++#define CLOSE 30 /* no Analogous to OPEN. */ ++ ++/* ++ * Opcode notes: ++ * ++ * BRANCH The set of branches constituting a single choice are hooked ++ * together with their "next" pointers, since precedence prevents ++ * anything being concatenated to any individual branch. The ++ * "next" pointer of the last BRANCH in a choice points to the ++ * thing following the whole choice. This is also where the ++ * final "next" pointer of each individual branch points; each ++ * branch starts with the operand node of a BRANCH node. ++ * ++ * BACK Normal "next" pointers all implicitly point forward; BACK ++ * exists to make loop structures possible. ++ * ++ * STAR,PLUS '?', and complex '*' and '+', are implemented as circular ++ * BRANCH structures using BACK. Simple cases (one character ++ * per match) are implemented with STAR and PLUS for speed ++ * and to minimize recursive plunges. ++ * ++ * OPEN,CLOSE ...are numbered at compile time. ++ */ ++ ++/* ++ * A node is one char of opcode followed by two chars of "next" pointer. ++ * "Next" pointers are stored as two 8-bit pieces, high order first. The ++ * value is a positive offset from the opcode of the node containing it. ++ * An operand, if any, simply follows the node. (Note that much of the ++ * code generation knows about this implicit relationship.) ++ * ++ * Using two bytes for the "next" pointer is vast overkill for most things, ++ * but allows patterns to get big without disasters. ++ */ ++#define OP(p) (*(p)) ++#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377)) ++#define OPERAND(p) ((p) + 3) ++ ++/* ++ * See regmagic.h for one further detail of program structure. ++ */ ++ ++ ++/* ++ * Utility definitions. ++ */ ++#ifndef CHARBITS ++#define UCHARAT(p) ((int)*(unsigned char *)(p)) ++#else ++#define UCHARAT(p) ((int)*(p)&CHARBITS) ++#endif ++ ++#define FAIL(m) { regerror(m); return(NULL); } ++#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?') ++#define META "^$.[()|?+*\\" ++ ++/* ++ * Flags to be passed up and down. ++ */ ++#define HASWIDTH 01 /* Known never to match null string. */ ++#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */ ++#define SPSTART 04 /* Starts with * or +. */ ++#define WORST 0 /* Worst case. */ ++ ++/* ++ * Global work variables for regcomp(). ++ */ ++struct match_globals { ++char *reginput; /* String-input pointer. */ ++char *regbol; /* Beginning of input, for ^ check. */ ++char **regstartp; /* Pointer to startp array. */ ++char **regendp; /* Ditto for endp. */ ++char *regparse; /* Input-scan pointer. */ ++int regnpar; /* () count. */ ++char regdummy; ++char *regcode; /* Code-emit pointer; ®dummy = don't. */ ++long regsize; /* Code size. */ ++}; ++ ++/* ++ * Forward declarations for regcomp()'s friends. ++ */ ++#ifndef STATIC ++#define STATIC static ++#endif ++STATIC char *reg(struct match_globals *g, int paren,int *flagp); ++STATIC char *regbranch(struct match_globals *g, int *flagp); ++STATIC char *regpiece(struct match_globals *g, int *flagp); ++STATIC char *regatom(struct match_globals *g, int *flagp); ++STATIC char *regnode(struct match_globals *g, char op); ++STATIC char *regnext(struct match_globals *g, char *p); ++STATIC void regc(struct match_globals *g, char b); ++STATIC void reginsert(struct match_globals *g, char op, char *opnd); ++STATIC void regtail(struct match_globals *g, char *p, char *val); ++STATIC void regoptail(struct match_globals *g, char *p, char *val); ++ ++ ++__kernel_size_t my_strcspn(const char *s1,const char *s2) ++{ ++ char *scan1; ++ char *scan2; ++ int count; ++ ++ count = 0; ++ for (scan1 = (char *)s1; *scan1 != '\0'; scan1++) { ++ for (scan2 = (char *)s2; *scan2 != '\0';) /* ++ moved down. */ ++ if (*scan1 == *scan2++) ++ return(count); ++ count++; ++ } ++ return(count); ++} ++ ++/* ++ - regcomp - compile a regular expression into internal code ++ * ++ * We can't allocate space until we know how big the compiled form will be, ++ * but we can't compile it (and thus know how big it is) until we've got a ++ * place to put the code. So we cheat: we compile it twice, once with code ++ * generation turned off and size counting turned on, and once "for real". ++ * This also means that we don't allocate space until we are sure that the ++ * thing really will compile successfully, and we never have to move the ++ * code and thus invalidate pointers into it. (Note that it has to be in ++ * one piece because free() must be able to free it all.) ++ * ++ * Beware that the optimization-preparation code in here knows about some ++ * of the structure of the compiled regexp. ++ */ ++regexp * ++regcomp(char *exp,int *patternsize) ++{ ++ register regexp *r; ++ register char *scan; ++ register char *longest; ++ register int len; ++ int flags; ++ struct match_globals g; ++ ++ /* commented out by ethan ++ extern char *malloc(); ++ */ ++ ++ if (exp == NULL) ++ FAIL("NULL argument"); ++ ++ /* First pass: determine size, legality. */ ++ g.regparse = exp; ++ g.regnpar = 1; ++ g.regsize = 0L; ++ g.regcode = &g.regdummy; ++ regc(&g, MAGIC); ++ if (reg(&g, 0, &flags) == NULL) ++ return(NULL); ++ ++ /* Small enough for pointer-storage convention? */ ++ if (g.regsize >= 32767L) /* Probably could be 65535L. */ ++ FAIL("regexp too big"); ++ ++ /* Allocate space. */ ++ *patternsize=sizeof(regexp) + (unsigned)g.regsize; ++ r = (regexp *)malloc(sizeof(regexp) + (unsigned)g.regsize); ++ if (r == NULL) ++ FAIL("out of space"); ++ ++ /* Second pass: emit code. */ ++ g.regparse = exp; ++ g.regnpar = 1; ++ g.regcode = r->program; ++ regc(&g, MAGIC); ++ if (reg(&g, 0, &flags) == NULL) ++ return(NULL); ++ ++ /* Dig out information for optimizations. */ ++ r->regstart = '\0'; /* Worst-case defaults. */ ++ r->reganch = 0; ++ r->regmust = NULL; ++ r->regmlen = 0; ++ scan = r->program+1; /* First BRANCH. */ ++ if (OP(regnext(&g, scan)) == END) { /* Only one top-level choice. */ ++ scan = OPERAND(scan); ++ ++ /* Starting-point info. */ ++ if (OP(scan) == EXACTLY) ++ r->regstart = *OPERAND(scan); ++ else if (OP(scan) == BOL) ++ r->reganch++; ++ ++ /* ++ * If there's something expensive in the r.e., find the ++ * longest literal string that must appear and make it the ++ * regmust. Resolve ties in favor of later strings, since ++ * the regstart check works with the beginning of the r.e. ++ * and avoiding duplication strengthens checking. Not a ++ * strong reason, but sufficient in the absence of others. ++ */ ++ if (flags&SPSTART) { ++ longest = NULL; ++ len = 0; ++ for (; scan != NULL; scan = regnext(&g, scan)) ++ if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) { ++ longest = OPERAND(scan); ++ len = strlen(OPERAND(scan)); ++ } ++ r->regmust = longest; ++ r->regmlen = len; ++ } ++ } ++ ++ return(r); ++} ++ ++/* ++ - reg - regular expression, i.e. main body or parenthesized thing ++ * ++ * Caller must absorb opening parenthesis. ++ * ++ * Combining parenthesis handling with the base level of regular expression ++ * is a trifle forced, but the need to tie the tails of the branches to what ++ * follows makes it hard to avoid. ++ */ ++static char * ++reg(struct match_globals *g, int paren, int *flagp /* Parenthesized? */ ) ++{ ++ register char *ret; ++ register char *br; ++ register char *ender; ++ register int parno = 0; /* 0 makes gcc happy */ ++ int flags; ++ ++ *flagp = HASWIDTH; /* Tentatively. */ ++ ++ /* Make an OPEN node, if parenthesized. */ ++ if (paren) { ++ if (g->regnpar >= NSUBEXP) ++ FAIL("too many ()"); ++ parno = g->regnpar; ++ g->regnpar++; ++ ret = regnode(g, OPEN+parno); ++ } else ++ ret = NULL; ++ ++ /* Pick up the branches, linking them together. */ ++ br = regbranch(g, &flags); ++ if (br == NULL) ++ return(NULL); ++ if (ret != NULL) ++ regtail(g, ret, br); /* OPEN -> first. */ ++ else ++ ret = br; ++ if (!(flags&HASWIDTH)) ++ *flagp &= ~HASWIDTH; ++ *flagp |= flags&SPSTART; ++ while (*g->regparse == '|') { ++ g->regparse++; ++ br = regbranch(g, &flags); ++ if (br == NULL) ++ return(NULL); ++ regtail(g, ret, br); /* BRANCH -> BRANCH. */ ++ if (!(flags&HASWIDTH)) ++ *flagp &= ~HASWIDTH; ++ *flagp |= flags&SPSTART; ++ } ++ ++ /* Make a closing node, and hook it on the end. */ ++ ender = regnode(g, (paren) ? CLOSE+parno : END); ++ regtail(g, ret, ender); ++ ++ /* Hook the tails of the branches to the closing node. */ ++ for (br = ret; br != NULL; br = regnext(g, br)) ++ regoptail(g, br, ender); ++ ++ /* Check for proper termination. */ ++ if (paren && *g->regparse++ != ')') { ++ FAIL("unmatched ()"); ++ } else if (!paren && *g->regparse != '\0') { ++ if (*g->regparse == ')') { ++ FAIL("unmatched ()"); ++ } else ++ FAIL("junk on end"); /* "Can't happen". */ ++ /* NOTREACHED */ ++ } ++ ++ return(ret); ++} ++ ++/* ++ - regbranch - one alternative of an | operator ++ * ++ * Implements the concatenation operator. ++ */ ++static char * ++regbranch(struct match_globals *g, int *flagp) ++{ ++ register char *ret; ++ register char *chain; ++ register char *latest; ++ int flags; ++ ++ *flagp = WORST; /* Tentatively. */ ++ ++ ret = regnode(g, BRANCH); ++ chain = NULL; ++ while (*g->regparse != '\0' && *g->regparse != '|' && *g->regparse != ')') { ++ latest = regpiece(g, &flags); ++ if (latest == NULL) ++ return(NULL); ++ *flagp |= flags&HASWIDTH; ++ if (chain == NULL) /* First piece. */ ++ *flagp |= flags&SPSTART; ++ else ++ regtail(g, chain, latest); ++ chain = latest; ++ } ++ if (chain == NULL) /* Loop ran zero times. */ ++ (void) regnode(g, NOTHING); ++ ++ return(ret); ++} ++ ++/* ++ - regpiece - something followed by possible [*+?] ++ * ++ * Note that the branching code sequences used for ? and the general cases ++ * of * and + are somewhat optimized: they use the same NOTHING node as ++ * both the endmarker for their branch list and the body of the last branch. ++ * It might seem that this node could be dispensed with entirely, but the ++ * endmarker role is not redundant. ++ */ ++static char * ++regpiece(struct match_globals *g, int *flagp) ++{ ++ register char *ret; ++ register char op; ++ register char *next; ++ int flags; ++ ++ ret = regatom(g, &flags); ++ if (ret == NULL) ++ return(NULL); ++ ++ op = *g->regparse; ++ if (!ISMULT(op)) { ++ *flagp = flags; ++ return(ret); ++ } ++ ++ if (!(flags&HASWIDTH) && op != '?') ++ FAIL("*+ operand could be empty"); ++ *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH); ++ ++ if (op == '*' && (flags&SIMPLE)) ++ reginsert(g, STAR, ret); ++ else if (op == '*') { ++ /* Emit x* as (x&|), where & means "self". */ ++ reginsert(g, BRANCH, ret); /* Either x */ ++ regoptail(g, ret, regnode(g, BACK)); /* and loop */ ++ regoptail(g, ret, ret); /* back */ ++ regtail(g, ret, regnode(g, BRANCH)); /* or */ ++ regtail(g, ret, regnode(g, NOTHING)); /* null. */ ++ } else if (op == '+' && (flags&SIMPLE)) ++ reginsert(g, PLUS, ret); ++ else if (op == '+') { ++ /* Emit x+ as x(&|), where & means "self". */ ++ next = regnode(g, BRANCH); /* Either */ ++ regtail(g, ret, next); ++ regtail(g, regnode(g, BACK), ret); /* loop back */ ++ regtail(g, next, regnode(g, BRANCH)); /* or */ ++ regtail(g, ret, regnode(g, NOTHING)); /* null. */ ++ } else if (op == '?') { ++ /* Emit x? as (x|) */ ++ reginsert(g, BRANCH, ret); /* Either x */ ++ regtail(g, ret, regnode(g, BRANCH)); /* or */ ++ next = regnode(g, NOTHING); /* null. */ ++ regtail(g, ret, next); ++ regoptail(g, ret, next); ++ } ++ g->regparse++; ++ if (ISMULT(*g->regparse)) ++ FAIL("nested *?+"); ++ ++ return(ret); ++} ++ ++/* ++ - regatom - the lowest level ++ * ++ * Optimization: gobbles an entire sequence of ordinary characters so that ++ * it can turn them into a single node, which is smaller to store and ++ * faster to run. Backslashed characters are exceptions, each becoming a ++ * separate node; the code is simpler that way and it's not worth fixing. ++ */ ++static char * ++regatom(struct match_globals *g, int *flagp) ++{ ++ register char *ret; ++ int flags; ++ ++ *flagp = WORST; /* Tentatively. */ ++ ++ switch (*g->regparse++) { ++ case '^': ++ ret = regnode(g, BOL); ++ break; ++ case '$': ++ ret = regnode(g, EOL); ++ break; ++ case '.': ++ ret = regnode(g, ANY); ++ *flagp |= HASWIDTH|SIMPLE; ++ break; ++ case '[': { ++ register int class; ++ register int classend; ++ ++ if (*g->regparse == '^') { /* Complement of range. */ ++ ret = regnode(g, ANYBUT); ++ g->regparse++; ++ } else ++ ret = regnode(g, ANYOF); ++ if (*g->regparse == ']' || *g->regparse == '-') ++ regc(g, *g->regparse++); ++ while (*g->regparse != '\0' && *g->regparse != ']') { ++ if (*g->regparse == '-') { ++ g->regparse++; ++ if (*g->regparse == ']' || *g->regparse == '\0') ++ regc(g, '-'); ++ else { ++ class = UCHARAT(g->regparse-2)+1; ++ classend = UCHARAT(g->regparse); ++ if (class > classend+1) ++ FAIL("invalid [] range"); ++ for (; class <= classend; class++) ++ regc(g, class); ++ g->regparse++; ++ } ++ } else ++ regc(g, *g->regparse++); ++ } ++ regc(g, '\0'); ++ if (*g->regparse != ']') ++ FAIL("unmatched []"); ++ g->regparse++; ++ *flagp |= HASWIDTH|SIMPLE; ++ } ++ break; ++ case '(': ++ ret = reg(g, 1, &flags); ++ if (ret == NULL) ++ return(NULL); ++ *flagp |= flags&(HASWIDTH|SPSTART); ++ break; ++ case '\0': ++ case '|': ++ case ')': ++ FAIL("internal urp"); /* Supposed to be caught earlier. */ ++ break; ++ case '?': ++ case '+': ++ case '*': ++ FAIL("?+* follows nothing"); ++ break; ++ case '\\': ++ if (*g->regparse == '\0') ++ FAIL("trailing \\"); ++ ret = regnode(g, EXACTLY); ++ regc(g, *g->regparse++); ++ regc(g, '\0'); ++ *flagp |= HASWIDTH|SIMPLE; ++ break; ++ default: { ++ register int len; ++ register char ender; ++ ++ g->regparse--; ++ len = my_strcspn((const char *)g->regparse, (const char *)META); ++ if (len <= 0) ++ FAIL("internal disaster"); ++ ender = *(g->regparse+len); ++ if (len > 1 && ISMULT(ender)) ++ len--; /* Back off clear of ?+* operand. */ ++ *flagp |= HASWIDTH; ++ if (len == 1) ++ *flagp |= SIMPLE; ++ ret = regnode(g, EXACTLY); ++ while (len > 0) { ++ regc(g, *g->regparse++); ++ len--; ++ } ++ regc(g, '\0'); ++ } ++ break; ++ } ++ ++ return(ret); ++} ++ ++/* ++ - regnode - emit a node ++ */ ++static char * /* Location. */ ++regnode(struct match_globals *g, char op) ++{ ++ register char *ret; ++ register char *ptr; ++ ++ ret = g->regcode; ++ if (ret == &g->regdummy) { ++ g->regsize += 3; ++ return(ret); ++ } ++ ++ ptr = ret; ++ *ptr++ = op; ++ *ptr++ = '\0'; /* Null "next" pointer. */ ++ *ptr++ = '\0'; ++ g->regcode = ptr; ++ ++ return(ret); ++} ++ ++/* ++ - regc - emit (if appropriate) a byte of code ++ */ ++static void ++regc(struct match_globals *g, char b) ++{ ++ if (g->regcode != &g->regdummy) ++ *g->regcode++ = b; ++ else ++ g->regsize++; ++} ++ ++/* ++ - reginsert - insert an operator in front of already-emitted operand ++ * ++ * Means relocating the operand. ++ */ ++static void ++reginsert(struct match_globals *g, char op, char* opnd) ++{ ++ register char *src; ++ register char *dst; ++ register char *place; ++ ++ if (g->regcode == &g->regdummy) { ++ g->regsize += 3; ++ return; ++ } ++ ++ src = g->regcode; ++ g->regcode += 3; ++ dst = g->regcode; ++ while (src > opnd) ++ *--dst = *--src; ++ ++ place = opnd; /* Op node, where operand used to be. */ ++ *place++ = op; ++ *place++ = '\0'; ++ *place++ = '\0'; ++} ++ ++/* ++ - regtail - set the next-pointer at the end of a node chain ++ */ ++static void ++regtail(struct match_globals *g, char *p, char *val) ++{ ++ register char *scan; ++ register char *temp; ++ register int offset; ++ ++ if (p == &g->regdummy) ++ return; ++ ++ /* Find last node. */ ++ scan = p; ++ for (;;) { ++ temp = regnext(g, scan); ++ if (temp == NULL) ++ break; ++ scan = temp; ++ } ++ ++ if (OP(scan) == BACK) ++ offset = scan - val; ++ else ++ offset = val - scan; ++ *(scan+1) = (offset>>8)&0377; ++ *(scan+2) = offset&0377; ++} ++ ++/* ++ - regoptail - regtail on operand of first argument; nop if operandless ++ */ ++static void ++regoptail(struct match_globals *g, char *p, char *val) ++{ ++ /* "Operandless" and "op != BRANCH" are synonymous in practice. */ ++ if (p == NULL || p == &g->regdummy || OP(p) != BRANCH) ++ return; ++ regtail(g, OPERAND(p), val); ++} ++ ++/* ++ * regexec and friends ++ */ ++ ++ ++/* ++ * Forwards. ++ */ ++STATIC int regtry(struct match_globals *g, regexp *prog, char *string); ++STATIC int regmatch(struct match_globals *g, char *prog); ++STATIC int regrepeat(struct match_globals *g, char *p); ++ ++#ifdef DEBUG ++int regnarrate = 0; ++void regdump(); ++STATIC char *regprop(char *op); ++#endif ++ ++/* ++ - regexec - match a regexp against a string ++ */ ++int ++regexec(regexp *prog, char *string) ++{ ++ register char *s; ++ struct match_globals g; ++ ++ /* Be paranoid... */ ++ if (prog == NULL || string == NULL) { ++ printk("<3>Regexp: NULL parameter\n"); ++ return(0); ++ } ++ ++ /* Check validity of program. */ ++ if (UCHARAT(prog->program) != MAGIC) { ++ printk("<3>Regexp: corrupted program\n"); ++ return(0); ++ } ++ ++ /* If there is a "must appear" string, look for it. */ ++ if (prog->regmust != NULL) { ++ s = string; ++ while ((s = strchr(s, prog->regmust[0])) != NULL) { ++ if (strncmp(s, prog->regmust, prog->regmlen) == 0) ++ break; /* Found it. */ ++ s++; ++ } ++ if (s == NULL) /* Not present. */ ++ return(0); ++ } ++ ++ /* Mark beginning of line for ^ . */ ++ g.regbol = string; ++ ++ /* Simplest case: anchored match need be tried only once. */ ++ if (prog->reganch) ++ return(regtry(&g, prog, string)); ++ ++ /* Messy cases: unanchored match. */ ++ s = string; ++ if (prog->regstart != '\0') ++ /* We know what char it must start with. */ ++ while ((s = strchr(s, prog->regstart)) != NULL) { ++ if (regtry(&g, prog, s)) ++ return(1); ++ s++; ++ } ++ else ++ /* We don't -- general case. */ ++ do { ++ if (regtry(&g, prog, s)) ++ return(1); ++ } while (*s++ != '\0'); ++ ++ /* Failure. */ ++ return(0); ++} ++ ++/* ++ - regtry - try match at specific point ++ */ ++static int /* 0 failure, 1 success */ ++regtry(struct match_globals *g, regexp *prog, char *string) ++{ ++ register int i; ++ register char **sp; ++ register char **ep; ++ ++ g->reginput = string; ++ g->regstartp = prog->startp; ++ g->regendp = prog->endp; ++ ++ sp = prog->startp; ++ ep = prog->endp; ++ for (i = NSUBEXP; i > 0; i--) { ++ *sp++ = NULL; ++ *ep++ = NULL; ++ } ++ if (regmatch(g, prog->program + 1)) { ++ prog->startp[0] = string; ++ prog->endp[0] = g->reginput; ++ return(1); ++ } else ++ return(0); ++} ++ ++/* ++ - regmatch - main matching routine ++ * ++ * Conceptually the strategy is simple: check to see whether the current ++ * node matches, call self recursively to see whether the rest matches, ++ * and then act accordingly. In practice we make some effort to avoid ++ * recursion, in particular by going through "ordinary" nodes (that don't ++ * need to know whether the rest of the match failed) by a loop instead of ++ * by recursion. ++ */ ++static int /* 0 failure, 1 success */ ++regmatch(struct match_globals *g, char *prog) ++{ ++ register char *scan = prog; /* Current node. */ ++ char *next; /* Next node. */ ++ ++#ifdef DEBUG ++ if (scan != NULL && regnarrate) ++ fprintf(stderr, "%s(\n", regprop(scan)); ++#endif ++ while (scan != NULL) { ++#ifdef DEBUG ++ if (regnarrate) ++ fprintf(stderr, "%s...\n", regprop(scan)); ++#endif ++ next = regnext(g, scan); ++ ++ switch (OP(scan)) { ++ case BOL: ++ if (g->reginput != g->regbol) ++ return(0); ++ break; ++ case EOL: ++ if (*g->reginput != '\0') ++ return(0); ++ break; ++ case ANY: ++ if (*g->reginput == '\0') ++ return(0); ++ g->reginput++; ++ break; ++ case EXACTLY: { ++ register int len; ++ register char *opnd; ++ ++ opnd = OPERAND(scan); ++ /* Inline the first character, for speed. */ ++ if (*opnd != *g->reginput) ++ return(0); ++ len = strlen(opnd); ++ if (len > 1 && strncmp(opnd, g->reginput, len) != 0) ++ return(0); ++ g->reginput += len; ++ } ++ break; ++ case ANYOF: ++ if (*g->reginput == '\0' || strchr(OPERAND(scan), *g->reginput) == NULL) ++ return(0); ++ g->reginput++; ++ break; ++ case ANYBUT: ++ if (*g->reginput == '\0' || strchr(OPERAND(scan), *g->reginput) != NULL) ++ return(0); ++ g->reginput++; ++ break; ++ case NOTHING: ++ case BACK: ++ break; ++ case OPEN+1: ++ case OPEN+2: ++ case OPEN+3: ++ case OPEN+4: ++ case OPEN+5: ++ case OPEN+6: ++ case OPEN+7: ++ case OPEN+8: ++ case OPEN+9: { ++ register int no; ++ register char *save; ++ ++ no = OP(scan) - OPEN; ++ save = g->reginput; ++ ++ if (regmatch(g, next)) { ++ /* ++ * Don't set startp if some later ++ * invocation of the same parentheses ++ * already has. ++ */ ++ if (g->regstartp[no] == NULL) ++ g->regstartp[no] = save; ++ return(1); ++ } else ++ return(0); ++ } ++ break; ++ case CLOSE+1: ++ case CLOSE+2: ++ case CLOSE+3: ++ case CLOSE+4: ++ case CLOSE+5: ++ case CLOSE+6: ++ case CLOSE+7: ++ case CLOSE+8: ++ case CLOSE+9: ++ { ++ register int no; ++ register char *save; ++ ++ no = OP(scan) - CLOSE; ++ save = g->reginput; ++ ++ if (regmatch(g, next)) { ++ /* ++ * Don't set endp if some later ++ * invocation of the same parentheses ++ * already has. ++ */ ++ if (g->regendp[no] == NULL) ++ g->regendp[no] = save; ++ return(1); ++ } else ++ return(0); ++ } ++ break; ++ case BRANCH: { ++ register char *save; ++ ++ if (OP(next) != BRANCH) /* No choice. */ ++ next = OPERAND(scan); /* Avoid recursion. */ ++ else { ++ do { ++ save = g->reginput; ++ if (regmatch(g, OPERAND(scan))) ++ return(1); ++ g->reginput = save; ++ scan = regnext(g, scan); ++ } while (scan != NULL && OP(scan) == BRANCH); ++ return(0); ++ /* NOTREACHED */ ++ } ++ } ++ break; ++ case STAR: ++ case PLUS: { ++ register char nextch; ++ register int no; ++ register char *save; ++ register int min; ++ ++ /* ++ * Lookahead to avoid useless match attempts ++ * when we know what character comes next. ++ */ ++ nextch = '\0'; ++ if (OP(next) == EXACTLY) ++ nextch = *OPERAND(next); ++ min = (OP(scan) == STAR) ? 0 : 1; ++ save = g->reginput; ++ no = regrepeat(g, OPERAND(scan)); ++ while (no >= min) { ++ /* If it could work, try it. */ ++ if (nextch == '\0' || *g->reginput == nextch) ++ if (regmatch(g, next)) ++ return(1); ++ /* Couldn't or didn't -- back up. */ ++ no--; ++ g->reginput = save + no; ++ } ++ return(0); ++ } ++ break; ++ case END: ++ return(1); /* Success! */ ++ break; ++ default: ++ printk("<3>Regexp: memory corruption\n"); ++ return(0); ++ break; ++ } ++ ++ scan = next; ++ } ++ ++ /* ++ * We get here only if there's trouble -- normally "case END" is ++ * the terminating point. ++ */ ++ printk("<3>Regexp: corrupted pointers\n"); ++ return(0); ++} ++ ++/* ++ - regrepeat - repeatedly match something simple, report how many ++ */ ++static int ++regrepeat(struct match_globals *g, char *p) ++{ ++ register int count = 0; ++ register char *scan; ++ register char *opnd; ++ ++ scan = g->reginput; ++ opnd = OPERAND(p); ++ switch (OP(p)) { ++ case ANY: ++ count = strlen(scan); ++ scan += count; ++ break; ++ case EXACTLY: ++ while (*opnd == *scan) { ++ count++; ++ scan++; ++ } ++ break; ++ case ANYOF: ++ while (*scan != '\0' && strchr(opnd, *scan) != NULL) { ++ count++; ++ scan++; ++ } ++ break; ++ case ANYBUT: ++ while (*scan != '\0' && strchr(opnd, *scan) == NULL) { ++ count++; ++ scan++; ++ } ++ break; ++ default: /* Oh dear. Called inappropriately. */ ++ printk("<3>Regexp: internal foulup\n"); ++ count = 0; /* Best compromise. */ ++ break; ++ } ++ g->reginput = scan; ++ ++ return(count); ++} ++ ++/* ++ - regnext - dig the "next" pointer out of a node ++ */ ++static char* ++regnext(struct match_globals *g, char *p) ++{ ++ register int offset; ++ ++ if (p == &g->regdummy) ++ return(NULL); ++ ++ offset = NEXT(p); ++ if (offset == 0) ++ return(NULL); ++ ++ if (OP(p) == BACK) ++ return(p-offset); ++ else ++ return(p+offset); ++} ++ ++#ifdef DEBUG ++ ++STATIC char *regprop(); ++ ++/* ++ - regdump - dump a regexp onto stdout in vaguely comprehensible form ++ */ ++void ++regdump(regexp *r) ++{ ++ register char *s; ++ register char op = EXACTLY; /* Arbitrary non-END op. */ ++ register char *next; ++ /* extern char *strchr(); */ ++ ++ ++ s = r->program + 1; ++ while (op != END) { /* While that wasn't END last time... */ ++ op = OP(s); ++ printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */ ++ next = regnext(s); ++ if (next == NULL) /* Next ptr. */ ++ printf("(0)"); ++ else ++ printf("(%d)", (s-r->program)+(next-s)); ++ s += 3; ++ if (op == ANYOF || op == ANYBUT || op == EXACTLY) { ++ /* Literal string, where present. */ ++ while (*s != '\0') { ++ putchar(*s); ++ s++; ++ } ++ s++; ++ } ++ putchar('\n'); ++ } ++ ++ /* Header fields of interest. */ ++ if (r->regstart != '\0') ++ printf("start `%c' ", r->regstart); ++ if (r->reganch) ++ printf("anchored "); ++ if (r->regmust != NULL) ++ printf("must have \"%s\"", r->regmust); ++ printf("\n"); ++} ++ ++/* ++ - regprop - printable representation of opcode ++ */ ++static char * ++regprop(char *op) ++{ ++#define BUFLEN 50 ++ register char *p; ++ static char buf[BUFLEN]; ++ ++ strcpy(buf, ":"); ++ ++ switch (OP(op)) { ++ case BOL: ++ p = "BOL"; ++ break; ++ case EOL: ++ p = "EOL"; ++ break; ++ case ANY: ++ p = "ANY"; ++ break; ++ case ANYOF: ++ p = "ANYOF"; ++ break; ++ case ANYBUT: ++ p = "ANYBUT"; ++ break; ++ case BRANCH: ++ p = "BRANCH"; ++ break; ++ case EXACTLY: ++ p = "EXACTLY"; ++ break; ++ case NOTHING: ++ p = "NOTHING"; ++ break; ++ case BACK: ++ p = "BACK"; ++ break; ++ case END: ++ p = "END"; ++ break; ++ case OPEN+1: ++ case OPEN+2: ++ case OPEN+3: ++ case OPEN+4: ++ case OPEN+5: ++ case OPEN+6: ++ case OPEN+7: ++ case OPEN+8: ++ case OPEN+9: ++ snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "OPEN%d", OP(op)-OPEN); ++ p = NULL; ++ break; ++ case CLOSE+1: ++ case CLOSE+2: ++ case CLOSE+3: ++ case CLOSE+4: ++ case CLOSE+5: ++ case CLOSE+6: ++ case CLOSE+7: ++ case CLOSE+8: ++ case CLOSE+9: ++ snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "CLOSE%d", OP(op)-CLOSE); ++ p = NULL; ++ break; ++ case STAR: ++ p = "STAR"; ++ break; ++ case PLUS: ++ p = "PLUS"; ++ break; ++ default: ++ printk("<3>Regexp: corrupted opcode\n"); ++ break; ++ } ++ if (p != NULL) ++ strncat(buf, p, BUFLEN-strlen(buf)); ++ return(buf); ++} ++#endif ++ ++ +--- /dev/null ++++ b/net/netfilter/regexp/regexp.h +@@ -0,0 +1,41 @@ ++/* ++ * Definitions etc. for regexp(3) routines. ++ * ++ * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof], ++ * not the System V one. ++ */ ++ ++#ifndef REGEXP_H ++#define REGEXP_H ++ ++ ++/* ++http://www.opensource.apple.com/darwinsource/10.3/expect-1/expect/expect.h , ++which contains a version of this library, says: ++ ++ * ++ * NSUBEXP must be at least 10, and no greater than 117 or the parser ++ * will not work properly. ++ * ++ ++However, it looks rather like this library is limited to 10. If you think ++otherwise, let us know. ++*/ ++ ++#define NSUBEXP 10 ++typedef struct regexp { ++ char *startp[NSUBEXP]; ++ char *endp[NSUBEXP]; ++ char regstart; /* Internal use only. */ ++ char reganch; /* Internal use only. */ ++ char *regmust; /* Internal use only. */ ++ int regmlen; /* Internal use only. */ ++ char program[1]; /* Unwarranted chumminess with compiler. */ ++} regexp; ++ ++regexp * regcomp(char *exp, int *patternsize); ++int regexec(regexp *prog, char *string); ++void regsub(regexp *prog, char *source, char *dest); ++void regerror(char *s); ++ ++#endif +--- /dev/null ++++ b/net/netfilter/regexp/regmagic.h +@@ -0,0 +1,5 @@ ++/* ++ * The first byte of the regexp internal "program" is actually this magic ++ * number; the start node begins in the second byte. ++ */ ++#define MAGIC 0234 +--- /dev/null ++++ b/net/netfilter/regexp/regsub.c +@@ -0,0 +1,95 @@ ++/* ++ * regsub ++ * @(#)regsub.c 1.3 of 2 April 86 ++ * ++ * Copyright (c) 1986 by University of Toronto. ++ * Written by Henry Spencer. Not derived from licensed software. ++ * ++ * Permission is granted to anyone to use this software for any ++ * purpose on any computer system, and to redistribute it freely, ++ * subject to the following restrictions: ++ * ++ * 1. The author is not responsible for the consequences of use of ++ * this software, no matter how awful, even if they arise ++ * from defects in it. ++ * ++ * 2. The origin of this software must not be misrepresented, either ++ * by explicit claim or by omission. ++ * ++ * 3. Altered versions must be plainly marked as such, and must not ++ * be misrepresented as being the original software. ++ * ++ * ++ * This code was modified by Ethan Sommer to work within the kernel ++ * (it now uses kmalloc etc..) ++ * ++ */ ++#include "regexp.h" ++#include "regmagic.h" ++#include <linux/string.h> ++ ++ ++#ifndef CHARBITS ++#define UCHARAT(p) ((int)*(unsigned char *)(p)) ++#else ++#define UCHARAT(p) ((int)*(p)&CHARBITS) ++#endif ++ ++#if 0 ++//void regerror(char * s) ++//{ ++// printk("regexp(3): %s", s); ++// /* NOTREACHED */ ++//} ++#endif ++ ++/* ++ - regsub - perform substitutions after a regexp match ++ */ ++void ++regsub(regexp * prog, char * source, char * dest) ++{ ++ register char *src; ++ register char *dst; ++ register char c; ++ register int no; ++ register int len; ++ ++ /* Not necessary and gcc doesn't like it -MLS */ ++ /*extern char *strncpy();*/ ++ ++ if (prog == NULL || source == NULL || dest == NULL) { ++ regerror("NULL parm to regsub"); ++ return; ++ } ++ if (UCHARAT(prog->program) != MAGIC) { ++ regerror("damaged regexp fed to regsub"); ++ return; ++ } ++ ++ src = source; ++ dst = dest; ++ while ((c = *src++) != '\0') { ++ if (c == '&') ++ no = 0; ++ else if (c == '\\' && '0' <= *src && *src <= '9') ++ no = *src++ - '0'; ++ else ++ no = -1; ++ ++ if (no < 0) { /* Ordinary character. */ ++ if (c == '\\' && (*src == '\\' || *src == '&')) ++ c = *src++; ++ *dst++ = c; ++ } else if (prog->startp[no] != NULL && prog->endp[no] != NULL) { ++ len = prog->endp[no] - prog->startp[no]; ++ (void) strncpy(dst, prog->startp[no], len); ++ dst += len; ++ if (len != 0 && *(dst-1) == '\0') { /* strncpy hit NUL. */ ++ regerror("damaged match string"); ++ return; ++ } ++ } ++ } ++ *dst++ = '\0'; ++} +--- /dev/null ++++ b/net/netfilter/xt_layer7.c +@@ -0,0 +1,666 @@ ++/* ++ Kernel module to match application layer (OSI layer 7) data in connections. ++ ++ http://l7-filter.sf.net ++ ++ (C) 2003-2009 Matthew Strait and Ethan Sommer. ++ ++ This program is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License ++ as published by the Free Software Foundation; either version ++ 2 of the License, or (at your option) any later version. ++ http://www.gnu.org/licenses/gpl.txt ++ ++ Based on ipt_string.c (C) 2000 Emmanuel Roger <winfield@freegates.be>, ++ xt_helper.c (C) 2002 Harald Welte and cls_layer7.c (C) 2003 Matthew Strait, ++ Ethan Sommer, Justin Levandoski. ++*/ ++ ++#include <linux/spinlock.h> ++#include <linux/version.h> ++#include <net/ip.h> ++#include <net/tcp.h> ++#include <linux/module.h> ++#include <linux/skbuff.h> ++#include <linux/netfilter.h> ++#include <net/netfilter/nf_conntrack.h> ++#include <net/netfilter/nf_conntrack_core.h> ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27) ++#include <net/netfilter/nf_conntrack_extend.h> ++#include <net/netfilter/nf_conntrack_acct.h> ++#endif ++#include <linux/netfilter/x_tables.h> ++#include <linux/netfilter/xt_layer7.h> ++#include <linux/ctype.h> ++#include <linux/proc_fs.h> ++ ++#include "regexp/regexp.c" ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Matthew Strait <quadong@users.sf.net>, Ethan Sommer <sommere@users.sf.net>"); ++MODULE_DESCRIPTION("iptables application layer match module"); ++MODULE_ALIAS("ipt_layer7"); ++MODULE_VERSION("2.21"); ++ ++static int maxdatalen = 2048; // this is the default ++module_param(maxdatalen, int, 0444); ++MODULE_PARM_DESC(maxdatalen, "maximum bytes of data looked at by l7-filter"); ++#ifdef CONFIG_NETFILTER_XT_MATCH_LAYER7_DEBUG ++ #define DPRINTK(format,args...) printk(format,##args) ++#else ++ #define DPRINTK(format,args...) ++#endif ++ ++/* Number of packets whose data we look at. ++This can be modified through /proc/net/layer7_numpackets */ ++static int num_packets = 10; ++ ++static struct pattern_cache { ++ char * regex_string; ++ regexp * pattern; ++ struct pattern_cache * next; ++} * first_pattern_cache = NULL; ++ ++DEFINE_SPINLOCK(l7_lock); ++ ++static int total_acct_packets(struct nf_conn *ct) ++{ ++#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 26) ++ BUG_ON(ct == NULL); ++ return (ct->counters[IP_CT_DIR_ORIGINAL].packets + ct->counters[IP_CT_DIR_REPLY].packets); ++#else ++ struct nf_conn_counter *acct; ++ ++ BUG_ON(ct == NULL); ++ acct = nf_conn_acct_find(ct); ++ if (!acct) ++ return 0; ++ return (acct[IP_CT_DIR_ORIGINAL].packets + acct[IP_CT_DIR_REPLY].packets); ++#endif ++} ++ ++#ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG ++/* Converts an unfriendly string into a friendly one by ++replacing unprintables with periods and all whitespace with " ". */ ++static char * friendly_print(unsigned char * s) ++{ ++ char * f = kmalloc(strlen(s) + 1, GFP_ATOMIC); ++ int i; ++ ++ if(!f) { ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: out of memory in " ++ "friendly_print, bailing.\n"); ++ return NULL; ++ } ++ ++ for(i = 0; i < strlen(s); i++){ ++ if(isprint(s[i]) && s[i] < 128) f[i] = s[i]; ++ else if(isspace(s[i])) f[i] = ' '; ++ else f[i] = '.'; ++ } ++ f[i] = '\0'; ++ return f; ++} ++ ++static char dec2hex(int i) ++{ ++ switch (i) { ++ case 0 ... 9: ++ return (i + '0'); ++ break; ++ case 10 ... 15: ++ return (i - 10 + 'a'); ++ break; ++ default: ++ if (net_ratelimit()) ++ printk("layer7: Problem in dec2hex\n"); ++ return '\0'; ++ } ++} ++ ++static char * hex_print(unsigned char * s) ++{ ++ char * g = kmalloc(strlen(s)*3 + 1, GFP_ATOMIC); ++ int i; ++ ++ if(!g) { ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: out of memory in hex_print, " ++ "bailing.\n"); ++ return NULL; ++ } ++ ++ for(i = 0; i < strlen(s); i++) { ++ g[i*3 ] = dec2hex(s[i]/16); ++ g[i*3 + 1] = dec2hex(s[i]%16); ++ g[i*3 + 2] = ' '; ++ } ++ g[i*3] = '\0'; ++ ++ return g; ++} ++#endif // DEBUG ++ ++/* Use instead of regcomp. As we expect to be seeing the same regexps over and ++over again, it make sense to cache the results. */ ++static regexp * compile_and_cache(const char * regex_string, ++ const char * protocol) ++{ ++ struct pattern_cache * node = first_pattern_cache; ++ struct pattern_cache * last_pattern_cache = first_pattern_cache; ++ struct pattern_cache * tmp; ++ unsigned int len; ++ ++ while (node != NULL) { ++ if (!strcmp(node->regex_string, regex_string)) ++ return node->pattern; ++ ++ last_pattern_cache = node;/* points at the last non-NULL node */ ++ node = node->next; ++ } ++ ++ /* If we reach the end of the list, then we have not yet cached ++ the pattern for this regex. Let's do that now. ++ Be paranoid about running out of memory to avoid list corruption. */ ++ tmp = kmalloc(sizeof(struct pattern_cache), GFP_ATOMIC); ++ ++ if(!tmp) { ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: out of memory in " ++ "compile_and_cache, bailing.\n"); ++ return NULL; ++ } ++ ++ tmp->regex_string = kmalloc(strlen(regex_string) + 1, GFP_ATOMIC); ++ tmp->pattern = kmalloc(sizeof(struct regexp), GFP_ATOMIC); ++ tmp->next = NULL; ++ ++ if(!tmp->regex_string || !tmp->pattern) { ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: out of memory in " ++ "compile_and_cache, bailing.\n"); ++ kfree(tmp->regex_string); ++ kfree(tmp->pattern); ++ kfree(tmp); ++ return NULL; ++ } ++ ++ /* Ok. The new node is all ready now. */ ++ node = tmp; ++ ++ if(first_pattern_cache == NULL) /* list is empty */ ++ first_pattern_cache = node; /* make node the beginning */ ++ else ++ last_pattern_cache->next = node; /* attach node to the end */ ++ ++ /* copy the string and compile the regex */ ++ len = strlen(regex_string); ++ DPRINTK("About to compile this: \"%s\"\n", regex_string); ++ node->pattern = regcomp((char *)regex_string, &len); ++ if ( !node->pattern ) { ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: Error compiling regexp " ++ "\"%s\" (%s)\n", ++ regex_string, protocol); ++ /* pattern is now cached as NULL, so we won't try again. */ ++ } ++ ++ strcpy(node->regex_string, regex_string); ++ return node->pattern; ++} ++ ++static int can_handle(const struct sk_buff *skb) ++{ ++ if(!ip_hdr(skb)) /* not IP */ ++ return 0; ++ if(ip_hdr(skb)->protocol != IPPROTO_TCP && ++ ip_hdr(skb)->protocol != IPPROTO_UDP && ++ ip_hdr(skb)->protocol != IPPROTO_ICMP) ++ return 0; ++ return 1; ++} ++ ++/* Returns offset the into the skb->data that the application data starts */ ++static int app_data_offset(const struct sk_buff *skb) ++{ ++ /* In case we are ported somewhere (ebtables?) where ip_hdr(skb) ++ isn't set, this can be gotten from 4*(skb->data[0] & 0x0f) as well. */ ++ int ip_hl = 4*ip_hdr(skb)->ihl; ++ ++ if( ip_hdr(skb)->protocol == IPPROTO_TCP ) { ++ /* 12 == offset into TCP header for the header length field. ++ Can't get this with skb->h.th->doff because the tcphdr ++ struct doesn't get set when routing (this is confirmed to be ++ true in Netfilter as well as QoS.) */ ++ int tcp_hl = 4*(skb->data[ip_hl + 12] >> 4); ++ ++ return ip_hl + tcp_hl; ++ } else if( ip_hdr(skb)->protocol == IPPROTO_UDP ) { ++ return ip_hl + 8; /* UDP header is always 8 bytes */ ++ } else if( ip_hdr(skb)->protocol == IPPROTO_ICMP ) { ++ return ip_hl + 8; /* ICMP header is 8 bytes */ ++ } else { ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: tried to handle unknown " ++ "protocol!\n"); ++ return ip_hl + 8; /* something reasonable */ ++ } ++} ++ ++/* handles whether there's a match when we aren't appending data anymore */ ++static int match_no_append(struct nf_conn * conntrack, ++ struct nf_conn * master_conntrack, ++ enum ip_conntrack_info ctinfo, ++ enum ip_conntrack_info master_ctinfo, ++ const struct xt_layer7_info * info) ++{ ++ /* If we're in here, throw the app data away */ ++ if(master_conntrack->layer7.app_data != NULL) { ++ ++ #ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG ++ if(!master_conntrack->layer7.app_proto) { ++ char * f = ++ friendly_print(master_conntrack->layer7.app_data); ++ char * g = ++ hex_print(master_conntrack->layer7.app_data); ++ DPRINTK("\nl7-filter gave up after %d bytes " ++ "(%d packets):\n%s\n", ++ strlen(f), total_acct_packets(master_conntrack), f); ++ kfree(f); ++ DPRINTK("In hex: %s\n", g); ++ kfree(g); ++ } ++ #endif ++ ++ kfree(master_conntrack->layer7.app_data); ++ master_conntrack->layer7.app_data = NULL; /* don't free again */ ++ } ++ ++ if(master_conntrack->layer7.app_proto){ ++ /* Here child connections set their .app_proto (for /proc) */ ++ if(!conntrack->layer7.app_proto) { ++ conntrack->layer7.app_proto = ++ kmalloc(strlen(master_conntrack->layer7.app_proto)+1, ++ GFP_ATOMIC); ++ if(!conntrack->layer7.app_proto){ ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: out of memory " ++ "in match_no_append, " ++ "bailing.\n"); ++ return 1; ++ } ++ strcpy(conntrack->layer7.app_proto, ++ master_conntrack->layer7.app_proto); ++ } ++ ++ return (!strcmp(master_conntrack->layer7.app_proto, ++ info->protocol)); ++ } ++ else { ++ /* If not classified, set to "unknown" to distinguish from ++ connections that are still being tested. */ ++ master_conntrack->layer7.app_proto = ++ kmalloc(strlen("unknown")+1, GFP_ATOMIC); ++ if(!master_conntrack->layer7.app_proto){ ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: out of memory in " ++ "match_no_append, bailing.\n"); ++ return 1; ++ } ++ strcpy(master_conntrack->layer7.app_proto, "unknown"); ++ return 0; ++ } ++} ++ ++/* add the new app data to the conntrack. Return number of bytes added. */ ++static int add_data(struct nf_conn * master_conntrack, ++ char * app_data, int appdatalen) ++{ ++ int length = 0, i; ++ int oldlength = master_conntrack->layer7.app_data_len; ++ ++ /* This is a fix for a race condition by Deti Fliegl. However, I'm not ++ clear on whether the race condition exists or whether this really ++ fixes it. I might just be being dense... Anyway, if it's not really ++ a fix, all it does is waste a very small amount of time. */ ++ if(!master_conntrack->layer7.app_data) return 0; ++ ++ /* Strip nulls. Make everything lower case (our regex lib doesn't ++ do case insensitivity). Add it to the end of the current data. */ ++ for(i = 0; i < maxdatalen-oldlength-1 && ++ i < appdatalen; i++) { ++ if(app_data[i] != '\0') { ++ /* the kernel version of tolower mungs 'upper ascii' */ ++ master_conntrack->layer7.app_data[length+oldlength] = ++ isascii(app_data[i])? ++ tolower(app_data[i]) : app_data[i]; ++ length++; ++ } ++ } ++ ++ master_conntrack->layer7.app_data[length+oldlength] = '\0'; ++ master_conntrack->layer7.app_data_len = length + oldlength; ++ ++ return length; ++} ++ ++/* taken from drivers/video/modedb.c */ ++static int my_atoi(const char *s) ++{ ++ int val = 0; ++ ++ for (;; s++) { ++ switch (*s) { ++ case '0'...'9': ++ val = 10*val+(*s-'0'); ++ break; ++ default: ++ return val; ++ } ++ } ++} ++ ++/* write out num_packets to userland. */ ++static int layer7_read_proc(char* page, char ** start, off_t off, int count, ++ int* eof, void * data) ++{ ++ if(num_packets > 99 && net_ratelimit()) ++ printk(KERN_ERR "layer7: NOT REACHED. num_packets too big\n"); ++ ++ page[0] = num_packets/10 + '0'; ++ page[1] = num_packets%10 + '0'; ++ page[2] = '\n'; ++ page[3] = '\0'; ++ ++ *eof=1; ++ ++ return 3; ++} ++ ++/* Read in num_packets from userland */ ++static int layer7_write_proc(struct file* file, const char* buffer, ++ unsigned long count, void *data) ++{ ++ char * foo = kmalloc(count, GFP_ATOMIC); ++ ++ if(!foo){ ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: out of memory, bailing. " ++ "num_packets unchanged.\n"); ++ return count; ++ } ++ ++ if(copy_from_user(foo, buffer, count)) { ++ return -EFAULT; ++ } ++ ++ ++ num_packets = my_atoi(foo); ++ kfree (foo); ++ ++ /* This has an arbitrary limit to make the math easier. I'm lazy. ++ But anyway, 99 is a LOT! If you want more, you're doing it wrong! */ ++ if(num_packets > 99) { ++ printk(KERN_WARNING "layer7: num_packets can't be > 99.\n"); ++ num_packets = 99; ++ } else if(num_packets < 1) { ++ printk(KERN_WARNING "layer7: num_packets can't be < 1.\n"); ++ num_packets = 1; ++ } ++ ++ return count; ++} ++ ++static bool ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28) ++match(const struct sk_buff *skbin, const struct xt_match_param *par) ++#else ++match(const struct sk_buff *skbin, ++ const struct net_device *in, ++ const struct net_device *out, ++ const struct xt_match *match, ++ const void *matchinfo, ++ int offset, ++ unsigned int protoff, ++ bool *hotdrop) ++#endif ++{ ++ /* sidestep const without getting a compiler warning... */ ++ struct sk_buff * skb = (struct sk_buff *)skbin; ++ ++ const struct xt_layer7_info * info = ++ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28) ++ par->matchinfo; ++ #else ++ matchinfo; ++ #endif ++ ++ enum ip_conntrack_info master_ctinfo, ctinfo; ++ struct nf_conn *master_conntrack, *conntrack; ++ unsigned char * app_data; ++ unsigned int pattern_result, appdatalen; ++ regexp * comppattern; ++ ++ /* Be paranoid/incompetent - lock the entire match function. */ ++ spin_lock_bh(&l7_lock); ++ ++ if(!can_handle(skb)){ ++ DPRINTK("layer7: This is some protocol I can't handle.\n"); ++ spin_unlock_bh(&l7_lock); ++ return info->invert; ++ } ++ ++ /* Treat parent & all its children together as one connection, except ++ for the purpose of setting conntrack->layer7.app_proto in the actual ++ connection. This makes /proc/net/ip_conntrack more satisfying. */ ++ if(!(conntrack = nf_ct_get(skb, &ctinfo)) || ++ !(master_conntrack=nf_ct_get(skb,&master_ctinfo))){ ++ DPRINTK("layer7: couldn't get conntrack.\n"); ++ spin_unlock_bh(&l7_lock); ++ return info->invert; ++ } ++ ++ /* Try to get a master conntrack (and its master etc) for FTP, etc. */ ++ while (master_ct(master_conntrack) != NULL) ++ master_conntrack = master_ct(master_conntrack); ++ ++ /* if we've classified it or seen too many packets */ ++ if(total_acct_packets(master_conntrack) > num_packets || ++ master_conntrack->layer7.app_proto) { ++ ++ pattern_result = match_no_append(conntrack, master_conntrack, ++ ctinfo, master_ctinfo, info); ++ ++ /* skb->cb[0] == seen. Don't do things twice if there are ++ multiple l7 rules. I'm not sure that using cb for this purpose ++ is correct, even though it says "put your private variables ++ there". But it doesn't look like it is being used for anything ++ else in the skbs that make it here. */ ++ skb->cb[0] = 1; /* marking it seen here's probably irrelevant */ ++ ++ spin_unlock_bh(&l7_lock); ++ return (pattern_result ^ info->invert); ++ } ++ ++ if(skb_is_nonlinear(skb)){ ++ if(skb_linearize(skb) != 0){ ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: failed to linearize " ++ "packet, bailing.\n"); ++ spin_unlock_bh(&l7_lock); ++ return info->invert; ++ } ++ } ++ ++ /* now that the skb is linearized, it's safe to set these. */ ++ app_data = skb->data + app_data_offset(skb); ++ appdatalen = skb_tail_pointer(skb) - app_data; ++ ++ /* the return value gets checked later, when we're ready to use it */ ++ comppattern = compile_and_cache(info->pattern, info->protocol); ++ ++ /* On the first packet of a connection, allocate space for app data */ ++ if(total_acct_packets(master_conntrack) == 1 && !skb->cb[0] && ++ !master_conntrack->layer7.app_data){ ++ master_conntrack->layer7.app_data = ++ kmalloc(maxdatalen, GFP_ATOMIC); ++ if(!master_conntrack->layer7.app_data){ ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: out of memory in " ++ "match, bailing.\n"); ++ spin_unlock_bh(&l7_lock); ++ return info->invert; ++ } ++ ++ master_conntrack->layer7.app_data[0] = '\0'; ++ } ++ ++ /* Can be here, but unallocated, if numpackets is increased near ++ the beginning of a connection */ ++ if(master_conntrack->layer7.app_data == NULL){ ++ spin_unlock_bh(&l7_lock); ++ return info->invert; /* unmatched */ ++ } ++ ++ if(!skb->cb[0]){ ++ int newbytes; ++ newbytes = add_data(master_conntrack, app_data, appdatalen); ++ ++ if(newbytes == 0) { /* didn't add any data */ ++ skb->cb[0] = 1; ++ /* Didn't match before, not going to match now */ ++ spin_unlock_bh(&l7_lock); ++ return info->invert; ++ } ++ } ++ ++ /* If looking for "unknown", then never match. "Unknown" means that ++ we've given up; we're still trying with these packets. */ ++ if(!strcmp(info->protocol, "unknown")) { ++ pattern_result = 0; ++ /* If looking for "unset", then always match. "Unset" means that we ++ haven't yet classified the connection. */ ++ } else if(!strcmp(info->protocol, "unset")) { ++ pattern_result = 2; ++ DPRINTK("layer7: matched unset: not yet classified " ++ "(%d/%d packets)\n", ++ total_acct_packets(master_conntrack), num_packets); ++ /* If the regexp failed to compile, don't bother running it */ ++ } else if(comppattern && ++ regexec(comppattern, master_conntrack->layer7.app_data)){ ++ DPRINTK("layer7: matched %s\n", info->protocol); ++ pattern_result = 1; ++ } else pattern_result = 0; ++ ++ if(pattern_result == 1) { ++ master_conntrack->layer7.app_proto = ++ kmalloc(strlen(info->protocol)+1, GFP_ATOMIC); ++ if(!master_conntrack->layer7.app_proto){ ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: out of memory in " ++ "match, bailing.\n"); ++ spin_unlock_bh(&l7_lock); ++ return (pattern_result ^ info->invert); ++ } ++ strcpy(master_conntrack->layer7.app_proto, info->protocol); ++ } else if(pattern_result > 1) { /* cleanup from "unset" */ ++ pattern_result = 1; ++ } ++ ++ /* mark the packet seen */ ++ skb->cb[0] = 1; ++ ++ spin_unlock_bh(&l7_lock); ++ return (pattern_result ^ info->invert); ++} ++ ++// load nf_conntrack_ipv4 ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28) ++static bool check(const struct xt_mtchk_param *par) ++{ ++ if (nf_ct_l3proto_try_module_get(par->match->family) < 0) { ++ printk(KERN_WARNING "can't load conntrack support for " ++ "proto=%d\n", par->match->family); ++#else ++static bool check(const char *tablename, const void *inf, ++ const struct xt_match *match, void *matchinfo, ++ unsigned int hook_mask) ++{ ++ if (nf_ct_l3proto_try_module_get(match->family) < 0) { ++ printk(KERN_WARNING "can't load conntrack support for " ++ "proto=%d\n", match->family); ++#endif ++ return 0; ++ } ++ return 1; ++} ++ ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28) ++ static void destroy(const struct xt_mtdtor_param *par) ++ { ++ nf_ct_l3proto_module_put(par->match->family); ++ } ++#else ++ static void destroy(const struct xt_match *match, void *matchinfo) ++ { ++ nf_ct_l3proto_module_put(match->family); ++ } ++#endif ++ ++static struct xt_match xt_layer7_match[] __read_mostly = { ++{ ++ .name = "layer7", ++ .family = AF_INET, ++ .checkentry = check, ++ .match = match, ++ .destroy = destroy, ++ .matchsize = sizeof(struct xt_layer7_info), ++ .me = THIS_MODULE ++} ++}; ++ ++static void layer7_cleanup_proc(void) ++{ ++ remove_proc_entry("layer7_numpackets", init_net.proc_net); ++} ++ ++/* register the proc file */ ++static void layer7_init_proc(void) ++{ ++ struct proc_dir_entry* entry; ++ entry = create_proc_entry("layer7_numpackets", 0644, init_net.proc_net); ++ entry->read_proc = layer7_read_proc; ++ entry->write_proc = layer7_write_proc; ++} ++ ++static int __init xt_layer7_init(void) ++{ ++ need_conntrack(); ++ ++ layer7_init_proc(); ++ if(maxdatalen < 1) { ++ printk(KERN_WARNING "layer7: maxdatalen can't be < 1, " ++ "using 1\n"); ++ maxdatalen = 1; ++ } ++ /* This is not a hard limit. It's just here to prevent people from ++ bringing their slow machines to a grinding halt. */ ++ else if(maxdatalen > 65536) { ++ printk(KERN_WARNING "layer7: maxdatalen can't be > 65536, " ++ "using 65536\n"); ++ maxdatalen = 65536; ++ } ++ return xt_register_matches(xt_layer7_match, ++ ARRAY_SIZE(xt_layer7_match)); ++} ++ ++static void __exit xt_layer7_fini(void) ++{ ++ layer7_cleanup_proc(); ++ xt_unregister_matches(xt_layer7_match, ARRAY_SIZE(xt_layer7_match)); ++} ++ ++module_init(xt_layer7_init); ++module_exit(xt_layer7_fini); + |
#vim 101-netfilter_layer7_pktmatch.patch
--- /dev/null 2010-02-26 12:46:03.368409969 +0000 +++ 101-netfilter_layer7_pktmatch.patch 2010-02-26 15:15:10.000000000 +0000 @@ -0,0 +1,109 @@ +--- a/include/linux/netfilter/xt_layer7.h ++++ b/include/linux/netfilter/xt_layer7.h +@@ -8,6 +8,7 @@ struct xt_layer7_info { + char protocol[MAX_PROTOCOL_LEN]; + char pattern[MAX_PATTERN_LEN]; + u_int8_t invert; ++ u_int8_t pkt; + }; + + #endif /* _XT_LAYER7_H */ +--- a/net/netfilter/xt_layer7.c ++++ b/net/netfilter/xt_layer7.c +@@ -314,33 +314,35 @@ static int match_no_append(struct nf_con + } + + /* add the new app data to the conntrack. Return number of bytes added. */ +-static int add_data(struct nf_conn * master_conntrack, +- char * app_data, int appdatalen) ++static int add_datastr(char *target, int offset, char *app_data, int len) + { + int length = 0, i; +- int oldlength = master_conntrack->layer7.app_data_len; +- +- /* This is a fix for a race condition by Deti Fliegl. However, I'm not +- clear on whether the race condition exists or whether this really +- fixes it. I might just be being dense... Anyway, if it's not really +- a fix, all it does is waste a very small amount of time. */ +- if(!master_conntrack->layer7.app_data) return 0; ++ if (!target) return 0; + + /* Strip nulls. Make everything lower case (our regex lib doesn't + do case insensitivity). Add it to the end of the current data. */ +- for(i = 0; i < maxdatalen-oldlength-1 && +- i < appdatalen; i++) { ++ for(i = 0; i < maxdatalen-offset-1 && i < len; i++) { + if(app_data[i] != '\0') { + /* the kernel version of tolower mungs 'upper ascii' */ +- master_conntrack->layer7.app_data[length+oldlength] = ++ target[length+offset] = + isascii(app_data[i])? + tolower(app_data[i]) : app_data[i]; + length++; + } + } ++ target[length+offset] = '\0'; ++ ++ return length; ++} ++ ++/* add the new app data to the conntrack. Return number of bytes added. */ ++static int add_data(struct nf_conn * master_conntrack, ++ char * app_data, int appdatalen) ++{ ++ int length; + +- master_conntrack->layer7.app_data[length+oldlength] = '\0'; +- master_conntrack->layer7.app_data_len = length + oldlength; ++ length = add_datastr(master_conntrack->layer7.app_data, master_conntrack->layer7.app_data_len, app_data, appdatalen); ++ master_conntrack->layer7.app_data_len += length; + + return length; + } +@@ -438,7 +440,7 @@ match(const struct sk_buff *skbin, + + enum ip_conntrack_info master_ctinfo, ctinfo; + struct nf_conn *master_conntrack, *conntrack; +- unsigned char * app_data; ++ unsigned char *app_data, *tmp_data; + unsigned int pattern_result, appdatalen; + regexp * comppattern; + +@@ -466,8 +468,8 @@ match(const struct sk_buff *skbin, + master_conntrack = master_ct(master_conntrack); + + /* if we've classified it or seen too many packets */ +- if(total_acct_packets(master_conntrack) > num_packets || +- master_conntrack->layer7.app_proto) { ++ if(!info->pkt && (total_acct_packets(master_conntrack) > num_packets || ++ master_conntrack->layer7.app_proto)) { + + pattern_result = match_no_append(conntrack, master_conntrack, + ctinfo, master_ctinfo, info); +@@ -500,6 +502,25 @@ match(const struct sk_buff *skbin, + /* the return value gets checked later, when we're ready to use it */ + comppattern = compile_and_cache(info->pattern, info->protocol); + ++ if (info->pkt) { ++ tmp_data = kmalloc(maxdatalen, GFP_ATOMIC); ++ if(!tmp_data){ ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: out of memory in match, bailing.\n"); ++ return info->invert; ++ } ++ ++ tmp_data[0] = '\0'; ++ add_datastr(tmp_data, 0, app_data, appdatalen); ++ pattern_result = ((comppattern && regexec(comppattern, tmp_data)) ? 1 : 0); ++ ++ kfree(tmp_data); ++ tmp_data = NULL; ++ spin_unlock_bh(&l7_lock); ++ ++ return (pattern_result ^ info->invert); ++ } ++ + /* On the first packet of a connection, allocate space for app data */ + if(total_acct_packets(master_conntrack) == 1 && !skb->cb[0] && + !master_conntrack->layer7.app_data){ + |
#ls
100-netfilter_layer7_2.21.patch iptables-1.4.3.2-imq_xt.patch 101-netfilter_layer7_pktmatch.patch linux-2.6.32.1.tar.bz2 150-netfilter_imq.patch linux-2.6.32.3.tar.bz2 200-sched_esfq linux-2.6.32-imq-test2.diff esfq-maybe.patch linux-2.6.32-imq-test2.diff.bz2 esfq-mypatch linux-2.6.33.tar.bz2 esfq.patch linux-source-2.6.26 esfq-patch-2.6.26 linux-source-2.6.26-copy.tar.bz2 esfq-patch-2.6.26.was linux-source-2.6.26.tar.bz2 iptables-1.4.3.2-imq.patch sched_esfq-2.6.29.patch |
#cd linux-2.6.32.3/
|
#patch -p1 -b <../
100-netfilter_layer7_2.21.patch iptables-1.4.3.2-imq_xt.patch 101-netfilter_layer7_pktmatch.patch linux-2.6.32.1.tar.bz2 150-netfilter_imq.patch linux-2.6.32.3/ 200-sched_esfq linux-2.6.32.3.tar.bz2 .config.2.6.32.3 linux-2.6.32-imq-test2.diff esfq-maybe.patch linux-2.6.32-imq-test2.diff.bz2 esfq-mypatch linux-2.6.33.tar.bz2 esfq.patch linux-source-2.6.26/ esfq-patch-2.6.26 linux-source-2.6.26-copy.tar.bz2 esfq-patch-2.6.26.was linux-source-2.6.26.tar.bz2 iptables-1.4.3.2-imq.patch sched_esfq-2.6.29.patch |
#patch -p1 -b <../100-netfilter_layer7_2.21.patch
patching file include/linux/netfilter/xt_layer7.h patching file include/net/netfilter/nf_conntrack.h patching file net/netfilter/Kconfig patching file net/netfilter/Makefile patching file net/netfilter/nf_conntrack_core.c patching file net/netfilter/nf_conntrack_standalone.c patching file net/netfilter/regexp/regexp.c patching file net/netfilter/regexp/regexp.h patching file net/netfilter/regexp/regmagic.h patching file net/netfilter/regexp/regsub.c patching file net/netfilter/xt_layer7.c |
#patch -p1 -b <../101-netfilter_layer7_pktmatch.patch
patching file include/linux/netfilter/xt_layer7.h patching file net/netfilter/xt_layer7.c |
#patch -p1 -b <../
100-netfilter_layer7_2.21.patch iptables-1.4.3.2-imq_xt.patch 101-netfilter_layer7_pktmatch.patch linux-2.6.32.1.tar.bz2 150-netfilter_imq.patch linux-2.6.32.3/ 200-sched_esfq linux-2.6.32.3.tar.bz2 .config.2.6.32.3 linux-2.6.32-imq-test2.diff esfq-maybe.patch linux-2.6.32-imq-test2.diff.bz2 esfq-mypatch linux-2.6.33.tar.bz2 esfq.patch linux-source-2.6.26/ esfq-patch-2.6.26 linux-source-2.6.26-copy.tar.bz2 esfq-patch-2.6.26.was linux-source-2.6.26.tar.bz2 iptables-1.4.3.2-imq.patch sched_esfq-2.6.29.patch |
#patch -p1 -b <../150-netfilter_imq.patch
patching file drivers/net/imq.c patching file drivers/net/Kconfig patching file drivers/net/Makefile patching file include/linux/imq.h patching file include/linux/netdevice.h patching file include/linux/netfilter/xt_IMQ.h patching file include/linux/netfilter_ipv4/ipt_IMQ.h patching file include/linux/netfilter_ipv6/ip6t_IMQ.h patching file include/linux/skbuff.h patching file include/net/netfilter/nf_queue.h patching file net/core/dev.c patching file net/core/skbuff.c patching file net/netfilter/Kconfig patching file net/netfilter/Makefile patching file net/netfilter/nf_queue.c patching file net/netfilter/xt_IMQ.c |
#patch -p1 -b <../200-sched_esfq
patching file include/linux/pkt_sched.h patching file net/sched/Kconfig patching file net/sched/Makefile patching file net/sched/sch_esfq.c |
#cp ../.config.2.6.32.3 ./.config
|
#make
HOSTCC scripts/basic/fixdep HOSTCC scripts/basic/docproc HOSTCC scripts/basic/hash HOSTCC scripts/kconfig/conf.o HOSTCC scripts/kconfig/kxgettext.o SHIPPED scripts/kconfig/zconf.tab.c SHIPPED scripts/kconfig/lex.zconf.c SHIPPED scripts/kconfig/zconf.hash.c HOSTCC scripts/kconfig/zconf.tab.o HOSTLD scripts/kconfig/conf ... CC net/sched/sch_prio.mod.o LD [M] net/sched/sch_prio.ko CC net/sched/sch_red.mod.o LD [M] net/sched/sch_red.ko CC net/sched/sch_sfq.mod.o LD [M] net/sched/sch_sfq.ko CC net/sched/sch_tbf.mod.o LD [M] net/sched/sch_tbf.ko CC net/sched/sch_teql.mod.o LD [M] net/sched/sch_teql.ko |
#ls
ChangeLog ezshaper-1.1rc.tar.gz iproute2-2.6.x-esfq.diff.tar.gz esfq-0.3 iproute2-2.6.33 iptables-1.4.4.tar.bz2 esfq-0.3.tar.gz iproute2-2.6.33.tar.bz2 lilalo ezshaper-1.1rc iproute2-2.6.x-esfq.diff README |
#tar xvf iptables-1.4.4.tar.bz2
iptables-1.4.4/ iptables-1.4.4/ip6tables.8.in iptables-1.4.4/ip6tables-restore.c iptables-1.4.4/xtables.c iptables-1.4.4/iptables-multi.c iptables-1.4.4/ip6tables-restore.8 iptables-1.4.4/ip6tables-save.c iptables-1.4.4/ip6tables-multi.h iptables-1.4.4/iptables-apply.8 iptables-1.4.4/iptables-multi.h ... iptables-1.4.4/release.sh iptables-1.4.4/iptables-save.c iptables-1.4.4/iptables.8.in iptables-1.4.4/ip6tables-save.8 iptables-1.4.4/configure iptables-1.4.4/missing iptables-1.4.4/config.sub iptables-1.4.4/.gitignore iptables-1.4.4/iptables-restore.c iptables-1.4.4/ip6tables-multi.c |
#ls
ChangeLog iproute2-2.6.33 iptables-1.4.4.tar.bz2 esfq-0.3 iproute2-2.6.33.tar.bz2 lilalo esfq-0.3.tar.gz iproute2-2.6.x-esfq.diff README ezshaper-1.1rc iproute2-2.6.x-esfq.diff.tar.gz ezshaper-1.1rc.tar.gz iptables-1.4.4 |
#cd iptables-1.4.4/
|
#patch -p1 -b <../iptables-1.4.4-imq.diff
patching file extensions/libxt_IMQ.c patching file include/linux/netfilter/xt_IMQ.h |
#./configure
checking for a BSD-compatible install... /usr/bin/install -c checking whether build environment is sane... yes checking for gawk... no checking for mawk... mawk checking whether make sets $(MAKE)... yes checking for gcc... gcc checking for C compiler default output file name... a.out checking whether the C compiler works... yes checking whether we are cross compiling... no checking for suffix of executables... ... config.status: creating extensions/GNUmakefile config.status: creating include/Makefile config.status: creating libipq/Makefile config.status: creating include/xtables.h config.status: creating include/iptables/internal.h config.status: creating libiptc.pc config.status: creating xtables.pc config.status: creating config.h config.status: executing depfiles commands config.status: executing libtool commands |
#make
make all-recursive make[1]: Entering directory `/var/downloads/iptables-1.4.4' Making all in extensions make[2]: Entering directory `/var/downloads/iptables-1.4.4/extensions' GEN initext4.c CC initext4.o AR libext4.a GEN initext6.c CC initext6.o AR libext6.a ... libtool: link: gcc -D_LARGEFILE_SOURCE=1 -D_LARGE_FILES -D_FILE_OFFSET_BITS=64 -D_REENTRANT -Wall -Waggregate-return -Wmissing-declarations -Wmissing-prototypes -Wredundant-decls -Wshadow -Wstrict-prototypes -Winline -pipe -DXTABLES_LIBDIR=\"/usr/local/libexec/xtables\" -DXTABLES_INTERNAL -I./include -I./include -g -O2 -o .libs/ip6tables-restore -rdynamic ip6tables-restore.o ip6tables.o libiptc/. depbase=`echo ip6tables-save.o | sed 's|[^/]*$|.deps/&|;s|\.o$||'`; \ if gcc -DHAVE_CONFIG_H -I. -I. -I. -D_LARGEFILE_SOURCE=1 -D_LARGE_FILES -D_FILE_OFFSET_BITS=64 -D_REENTRANT -Wall -Waggregate-return -Wmissing-declarations -Wmissing-prototypes -Wredundant-decls -Wshadow -Wstrict-prototypes -Winline -pipe -DXTABLES_LIBDIR=\"/usr/local/libexec/xtables\" -DXTABLES_INTERNAL -I./include -I./include -g -O2 -MT ip6tables-save.o -MD -MP -MF "$de then mv -f "$depbase.Tpo" "$depbase.Po"; else rm -f "$depbase.Tpo"; exit 1; fi /bin/sh ./libtool --tag=CC --mode=link gcc -D_LARGEFILE_SOURCE=1 -D_LARGE_FILES -D_FILE_OFFSET_BITS=64 -D_REENTRANT -Wall -Waggregate-return -Wmissing-declarations -Wmissing-prototypes -Wredundant-decls -Wshadow -Wstrict-prototypes -Winline -pipe -DXTABLES_LIBDIR=\"/usr/local/libexec/xtables\" -DXTABLES_INTERNAL -I./include -I./include -g -O2 -o ip6tables-save -rdynamic ip6tables-save. libtool: link: gcc -D_LARGEFILE_SOURCE=1 -D_LARGE_FILES -D_FILE_OFFSET_BITS=64 -D_REENTRANT -Wall -Waggregate-return -Wmissing-declarations -Wmissing-prototypes -Wredundant-decls -Wshadow -Wstrict-prototypes -Winline -pipe -DXTABLES_LIBDIR=\"/usr/local/libexec/xtables\" -DXTABLES_INTERNAL -I./include -I./include -g -O2 -o .libs/ip6tables-save -rdynamic ip6tables-save.o ip6tables.o libiptc/.libs/l sed -e 's/@PACKAGE_AND_VERSION@/iptables 1.4.4/g' -e '/@MATCH@/ r extensions/matches4.man' -e '/@TARGET@/ r extensions/targets4.man' iptables.8.in >iptables.8; sed -e 's/@PACKAGE_AND_VERSION@/iptables 1.4.4/g' -e '/@MATCH@/ r extensions/matches6.man' -e '/@TARGET@/ r extensions/targets6.man' ip6tables.8.in >ip6tables.8; make[2]: Leaving directory `/var/downloads/iptables-1.4.4' make[1]: Leaving directory `/var/downloads/iptables-1.4.4' |
#./configure --with-ksource=/usr/src/linux-2.6.32
linux-2.6.32.1.tar.bz2 linux-2.6.32-imq-test2.diff linux-2.6.32.3/ linux-2.6.32-imq-test2.diff.bz2 linux-2.6.32.3.tar.bz2 |
#./configure --with-ksource=/usr/src/linux-2.6.32
linux-2.6.32.1.tar.bz2 linux-2.6.32-imq-test2.diff linux-2.6.32.3/ linux-2.6.32-imq-test2.diff.bz2 linux-2.6.32.3.tar.bz2 |
#./configure --with-ksource=/usr/src/linux-2.6.32.3/
arch/ Makefile .tmp_kallsyms3.o block/ .missing-syscalls.d ..tmp_kallsyms3.o.cmd .config mm/ .tmp_kallsyms3.S COPYING modules.order .tmp_System.map CREDITS Module.symvers .tmp_versions/ crypto/ net/ .tmp_vmlinux1 Documentation/ README ..tmp_vmlinux1.cmd drivers/ REPORTING-BUGS .tmp_vmlinux2 firmware/ samples/ ..tmp_vmlinux2.cmd fs/ scripts/ .tmp_vmlinux3 .gitignore security/ ..tmp_vmlinux3.cmd include/ sound/ tools/ init/ System.map usr/ ipc/ .tmp_kallsyms1.o .version Kbuild ..tmp_kallsyms1.o.cmd virt/ kernel/ .tmp_kallsyms1.S vmlinux lib/ .tmp_kallsyms2.o .vmlinux.cmd .mailmap ..tmp_kallsyms2.o.cmd vmlinux.o MAINTAINERS .tmp_kallsyms2.S .vmlinux.o.cmd |
#./configure --with-ksource=/usr/src/linux-2.6.32.3/
checking for a BSD-compatible install... /usr/bin/install -c checking whether build environment is sane... yes checking for gawk... no checking for mawk... mawk checking whether make sets $(MAKE)... yes checking for gcc... gcc checking for C compiler default output file name... a.out checking whether the C compiler works... yes checking whether we are cross compiling... no checking for suffix of executables... ... config.status: creating include/Makefile config.status: creating libipq/Makefile config.status: creating include/xtables.h config.status: creating include/iptables/internal.h config.status: creating libiptc.pc config.status: creating xtables.pc config.status: creating config.h config.status: config.h is unchanged config.status: executing depfiles commands config.status: executing libtool commands |
#make modules
CHK include/linux/version.h CHK include/linux/utsrelease.h SYMLINK include/asm -> include/asm-x86 CALL scripts/checksyscalls.sh Building modules, stage 2. MODPOST 76 modules |
#make
make all-recursive make[1]: Entering directory `/var/downloads/iptables-1.4.4' Making all in extensions make[2]: Entering directory `/var/downloads/iptables-1.4.4/extensions' CC libxt_CLASSIFY.oo CCLD libxt_CLASSIFY.so CC libxt_cluster.oo CCLD libxt_cluster.so CC libxt_comment.oo CCLD libxt_comment.so ... In file included from /usr/include/asm/types.h:4, from ./include/linux/types.h:6, from ./include/libiptc/libiptc.h:5, from libiptc/libip4tc.c:29: /usr/src/linux-2.6.32.3//include/asm-generic/int-ll64.h:11:29: error: asm/bitsperlong.h: Нет такого файла или каталога make[2]: *** [libiptc/libip4tc.lo] Ошибка 1 make[2]: Leaving directory `/var/downloads/iptables-1.4.4' make[1]: *** [all-recursive] Ошибка 1 make[1]: Leaving directory `/var/downloads/iptables-1.4.4' make: *** [all] Ошибка 2 |
#as bitsperlong
|
#make modules_install
INSTALL arch/x86/kernel/test_nx.ko INSTALL arch/x86/kvm/kvm-amd.ko INSTALL arch/x86/kvm/kvm-intel.ko INSTALL arch/x86/kvm/kvm.ko INSTALL drivers/char/hw_random/amd-rng.ko INSTALL drivers/net/dummy.ko INSTALL drivers/net/ppp_generic.ko INSTALL drivers/net/pppoe.ko INSTALL drivers/net/pppox.ko INSTALL drivers/net/slhc.ko ... INSTALL net/sched/sch_hfsc.ko INSTALL net/sched/sch_htb.ko INSTALL net/sched/sch_ingress.ko INSTALL net/sched/sch_netem.ko INSTALL net/sched/sch_prio.ko INSTALL net/sched/sch_red.ko INSTALL net/sched/sch_sfq.ko INSTALL net/sched/sch_tbf.ko INSTALL net/sched/sch_teql.ko DEPMOD 2.6.32.3 |
#mkinitramfs
|
#mkinitramfs -o /boot/initrd-2.6.32.3-esfq-imq
|
#ls /usr/src/linux-2.6.32.3/include/asm-generic/bit
bitops/ bitops.h bitsperlong.h |
#ls /usr/src/linux-2.6.32.3/include/asm-generic/bit
bitops/ bitops.h bitsperlong.h |
#ls /usr/src/linux-2.6.32.3/include/asm
asm-offsets.h |
# ls /usr/src/linux-2.6.32.3/include/asm
make make all-recursive make[1]: Entering directory `/var/downloads/iptables-1.4.4' Making all in extensions make[2]: Entering directory `/var/downloads/iptables-1.4.4/extensions' make[2]: Leaving directory `/var/downloads/iptables-1.4.4/extensions' Making all in include make[2]: Entering directory `/var/downloads/iptables-1.4.4/include' make[2]: Цель `all' не требует выполнения команд. make[2]: Leaving directory `/var/downloads/iptables-1.4.4/include' ... In file included from /usr/include/asm/types.h:4, from ./include/linux/types.h:6, from ./include/libiptc/libiptc.h:5, from libiptc/libip4tc.c:29: /usr/src/linux-2.6.32.3//include/asm-generic/int-ll64.h:11:29: error: asm/bitsperlong.h: Нет такого файла или каталога make[2]: *** [libiptc/libip4tc.lo] Ошибка 1 make[2]: Leaving directory `/var/downloads/iptables-1.4.4' make[1]: *** [all-recursive] Ошибка 1 make[1]: Leaving directory `/var/downloads/iptables-1.4.4' make: *** [all] Ошибка 2 |
# ls /usr/src/linux-2.6.32.3/include/asm
./configure --with-ksource=/usr/src/linux-2.6.32.3 checking for a BSD-compatible install... /usr/bin/install -c checking whether build environment is sane... yes checking for gawk... no checking for mawk... mawk checking whether make sets $(MAKE)... yes checking for gcc... gcc checking for C compiler default output file name... a.out checking whether the C compiler works... yes checking whether we are cross compiling... no ... config.status: creating include/Makefile config.status: creating libipq/Makefile config.status: creating include/xtables.h config.status: creating include/iptables/internal.h config.status: creating libiptc.pc config.status: creating xtables.pc config.status: creating config.h config.status: config.h is unchanged config.status: executing depfiles commands config.status: executing libtool commands |
#make
make all-recursive make[1]: Entering directory `/var/downloads/iptables-1.4.4' Making all in extensions make[2]: Entering directory `/var/downloads/iptables-1.4.4/extensions' GEN initext4.c CC initext4.o AR libext4.a GEN initext6.c CC initext6.o AR libext6.a ... In file included from /usr/include/asm/types.h:4, from ./include/linux/types.h:6, from ./include/libiptc/libiptc.h:5, from libiptc/libip4tc.c:29: /usr/src/linux-2.6.32.3/include/asm-generic/int-ll64.h:11:29: error: asm/bitsperlong.h: Нет такого файла или каталога make[2]: *** [libiptc/libip4tc.lo] Ошибка 1 make[2]: Leaving directory `/var/downloads/iptables-1.4.4' make[1]: *** [all-recursive] Ошибка 1 make[1]: Leaving directory `/var/downloads/iptables-1.4.4' make: *** [all] Ошибка 2 |
#ls
aclocal.m4 include iptables-apply.8 libiptc.pc.in autogen.sh INCOMPATIBILITIES iptables.c libtool COMMIT_NOTES INSTALL iptables-multi.c ltmain.sh compile install-sh iptables-multi.h m4 config.guess ip6tables.8.in iptables-restore.8 Makefile config.h ip6tables.c iptables-restore.c Makefile.am config.h.in ip6tables-multi.c iptables-save.8 Makefile.in config.log ip6tables-multi.h iptables-save.c missing config.status ip6tables-restore.8 iptables-standalone.c release.sh config.sub ip6tables-restore.c iptables-xml.8 stamp-h1 configure ip6tables-save.8 iptables-xml.c xtables.c configure.ac ip6tables-save.c iptables.xslt xtables.pc COPYING ip6tables-standalone.c libipq xtables.pc.in depcomp iptables.8.in libiptc extensions iptables-apply libiptc.pc |
#pwd
/var/downloads/iptables-1.4.4 |
#ls /usr/src/linux-2.6.32.3/include/asm-generic
4level-fixup.h ide_iops.h posix_types.h asm int-l64.h resource.h asm-offsets.h int-ll64.h rtc.h atomic64.h ioctl.h scatterlist.h atomic.h ioctls.h sections.h atomic-long.h io.h segment.h audit_change_attr.h iomap.h sembuf.h audit_dir_write.h ipcbuf.h serial.h audit_read.h irqflags.h setup.h audit_signal.h irq.h shmbuf.h ... emergency-restart.h param.h types.h errno-base.h parport.h uaccess.h errno.h pci-dma-compat.h uaccess-unaligned.h fb.h pci.h ucontext.h fcntl.h percpu.h unaligned.h futex.h pgalloc.h unistd.h getorder.h pgtable.h user.h gpio.h pgtable-nopmd.h vga.h hardirq.h pgtable-nopud.h vmlinux.lds.h hw_irq.h poll.h xor.h |
#ls -al /usr/src/linux-2.6.32.3/include/asm-
asm-generic/ asm-was/ asm-x86/ |
#ls -al /usr/src/linux-2.6.32.3/include/asm-
|
#make
make all-recursive make[1]: Entering directory `/var/downloads/iptables-1.4.4' Making all in extensions make[2]: Entering directory `/var/downloads/iptables-1.4.4/extensions' make[2]: Leaving directory `/var/downloads/iptables-1.4.4/extensions' Making all in include make[2]: Entering directory `/var/downloads/iptables-1.4.4/include' make[2]: Цель `all' не требует выполнения команд. make[2]: Leaving directory `/var/downloads/iptables-1.4.4/include' make[2]: Entering directory `/var/downloads/iptables-1.4.4' ... In file included from /usr/include/asm/types.h:4, from ./include/linux/types.h:6, from ./include/libiptc/libiptc.h:5, from libiptc/libip4tc.c:29: /usr/src/linux-2.6.32.3/include/asm-generic/int-ll64.h:11:29: error: asm/bitsperlong.h: Нет такого файла или каталога make[2]: *** [libiptc/libip4tc.lo] Ошибка 1 make[2]: Leaving directory `/var/downloads/iptables-1.4.4' make[1]: *** [all-recursive] Ошибка 1 make[1]: Leaving directory `/var/downloads/iptables-1.4.4' make: *** [all] Ошибка 2 |
#make
make all-recursive make[1]: Entering directory `/var/downloads/iptables-1.4.4' Making all in extensions make[2]: Entering directory `/var/downloads/iptables-1.4.4/extensions' make[2]: Leaving directory `/var/downloads/iptables-1.4.4/extensions' Making all in include make[2]: Entering directory `/var/downloads/iptables-1.4.4/include' make[2]: Цель `all' не требует выполнения команд. make[2]: Leaving directory `/var/downloads/iptables-1.4.4/include' make[2]: Entering directory `/var/downloads/iptables-1.4.4' ... In file included from /usr/include/asm/types.h:4, from ./include/linux/types.h:6, from ./include/libiptc/libiptc.h:5, from libiptc/libip4tc.c:29: /usr/src/linux-2.6.32.3/include/asm-generic/int-ll64.h:14:25: error: bitsperlong.h: Нет такого файла или каталога make[2]: *** [libiptc/libip4tc.lo] Ошибка 1 make[2]: Leaving directory `/var/downloads/iptables-1.4.4' make[1]: *** [all-recursive] Ошибка 1 make[1]: Leaving directory `/var/downloads/iptables-1.4.4' make: *** [all] Ошибка 2 |
#ls -al asm
ls: невозможно получить доступ к asm: Нет такого файла или каталога |
#rm /usr/src/linux-2.6.32.3/include/asm-generic/asm
|
#make
"/usr/src/linux-2.6.32.3/include/asm-generic/int-ll64.h" 81L, 1508C */ #include <bitsperlong.h> #ifndef __ASSEMBLY__ #include <asm/bitsperlong.h> */ typedef __signed__ char __s8; typedef __signed__ short __s16; typedef unsigned short __u16; "/usr/src/linux-2.6.32.3/include/asm-generic/int-ll64.h" 78L, 1473C written |
#make
make all-recursive make[1]: Entering directory `/var/downloads/iptables-1.4.4' Making all in extensions make[2]: Entering directory `/var/downloads/iptables-1.4.4/extensions' make[2]: Leaving directory `/var/downloads/iptables-1.4.4/extensions' Making all in include make[2]: Entering directory `/var/downloads/iptables-1.4.4/include' make[2]: Цель `all' не требует выполнения команд. make[2]: Leaving directory `/var/downloads/iptables-1.4.4/include' make[2]: Entering directory `/var/downloads/iptables-1.4.4' ... libtool: link: gcc -D_LARGEFILE_SOURCE=1 -D_LARGE_FILES -D_FILE_OFFSET_BITS=64 -D_REENTRANT -Wall -Waggregate-return -Wmissing-declarations -Wmissing-prototypes -Wredundant-decls -Wshadow -Wstrict-prototypes -Winline -pipe -DXTABLES_LIBDIR=\"/usr/local/libexec/xtables\" -DXTABLES_INTERNAL -I./include -I./include -I /usr/src/linux-2.6.32.3/include -g -O2 -o .libs/ip6tables-restore -rdynamic ip6tabl depbase=`echo ip6tables-save.o | sed 's|[^/]*$|.deps/&|;s|\.o$||'`; \ if gcc -DHAVE_CONFIG_H -I. -I. -I. -D_LARGEFILE_SOURCE=1 -D_LARGE_FILES -D_FILE_OFFSET_BITS=64 -D_REENTRANT -Wall -Waggregate-return -Wmissing-declarations -Wmissing-prototypes -Wredundant-decls -Wshadow -Wstrict-prototypes -Winline -pipe -DXTABLES_LIBDIR=\"/usr/local/libexec/xtables\" -DXTABLES_INTERNAL -I./include -I./include -I /usr/src/linux-2.6.32.3/include -g -O2 -MT then mv -f "$depbase.Tpo" "$depbase.Po"; else rm -f "$depbase.Tpo"; exit 1; fi /bin/sh ./libtool --tag=CC --mode=link gcc -D_LARGEFILE_SOURCE=1 -D_LARGE_FILES -D_FILE_OFFSET_BITS=64 -D_REENTRANT -Wall -Waggregate-return -Wmissing-declarations -Wmissing-prototypes -Wredundant-decls -Wshadow -Wstrict-prototypes -Winline -pipe -DXTABLES_LIBDIR=\"/usr/local/libexec/xtables\" -DXTABLES_INTERNAL -I./include -I./include -I /usr/src/linux-2.6.32.3/include -g -O2 -o ip6tab libtool: link: gcc -D_LARGEFILE_SOURCE=1 -D_LARGE_FILES -D_FILE_OFFSET_BITS=64 -D_REENTRANT -Wall -Waggregate-return -Wmissing-declarations -Wmissing-prototypes -Wredundant-decls -Wshadow -Wstrict-prototypes -Winline -pipe -DXTABLES_LIBDIR=\"/usr/local/libexec/xtables\" -DXTABLES_INTERNAL -I./include -I./include -I /usr/src/linux-2.6.32.3/include -g -O2 -o .libs/ip6tables-save -rdynamic ip6tables- sed -e 's/@PACKAGE_AND_VERSION@/iptables 1.4.4/g' -e '/@MATCH@/ r extensions/matches4.man' -e '/@TARGET@/ r extensions/targets4.man' iptables.8.in >iptables.8; sed -e 's/@PACKAGE_AND_VERSION@/iptables 1.4.4/g' -e '/@MATCH@/ r extensions/matches6.man' -e '/@TARGET@/ r extensions/targets6.man' ip6tables.8.in >ip6tables.8; make[2]: Leaving directory `/var/downloads/iptables-1.4.4' make[1]: Leaving directory `/var/downloads/iptables-1.4.4' |
#make install
Making install in extensions make[1]: Entering directory `/var/downloads/iptables-1.4.4/extensions' if test -n " libxt_CLASSIFY.so libxt_cluster.so libxt_comment.so libxt_connbytes.so libxt_connlimit.so libxt_connmark.so libxt_CONNMARK.so libxt_CONNSECMARK.so libxt_conntrack.so libxt_dccp.so libxt_dscp.so libxt_DSCP.so libxt_esp.so libxt_hashlimit.so libxt_helper.so libxt_IMQ.so libxt_iprange.so libxt_length.so libxt_limit.so libxt_mac.so libxt_mark.so libxt_MARK.so libxt_multiport.so libxt_NFLO make[1]: Leaving directory `/var/downloads/iptables-1.4.4/extensions' Making install in include make[1]: Entering directory `/var/downloads/iptables-1.4.4/include' make[2]: Entering directory `/var/downloads/iptables-1.4.4/include' make[2]: Цель `install-exec-am' не требует выполнения команд. test -z "/usr/local/include" || mkdir -p -- "/usr/local/include" test -z "/usr/local/include" || mkdir -p -- "/usr/local/include" ... /usr/bin/install -c -m 644 './iptables-save.8' '/usr/local/share/man/man8/iptables-save.8' /usr/bin/install -c -m 644 './iptables-xml.8' '/usr/local/share/man/man8/iptables-xml.8' /usr/bin/install -c -m 644 './ip6tables.8' '/usr/local/share/man/man8/ip6tables.8' /usr/bin/install -c -m 644 './ip6tables-restore.8' '/usr/local/share/man/man8/ip6tables-restore.8' /usr/bin/install -c -m 644 './ip6tables-save.8' '/usr/local/share/man/man8/ip6tables-save.8' test -z "/usr/local/lib/pkgconfig" || mkdir -p -- "/usr/local/lib/pkgconfig" /usr/bin/install -c -m 644 'libiptc.pc' '/usr/local/lib/pkgconfig/libiptc.pc' /usr/bin/install -c -m 644 'xtables.pc' '/usr/local/lib/pkgconfig/xtables.pc' make[2]: Leaving directory `/var/downloads/iptables-1.4.4' make[1]: Leaving directory `/var/downloads/iptables-1.4.4' |
#iptables -V
iptables v1.4.2 |
#whereis iptables
iptables: /sbin/iptables /usr/local/sbin/iptables /usr/share/iptables /usr/share/man/man8/iptables.8.gz |
#/usr/local/sbin/iptables -V
iptables v1.4.4 |
#make uninstall
Making uninstall in extensions make[1]: Entering directory `/var/downloads/iptables-1.4.4/extensions' make[1]: *** Нет правила для сборки цели `uninstall'. Останов. make[1]: Leaving directory `/var/downloads/iptables-1.4.4/extensions' make: *** [uninstall-recursive] Ошибка 1 |
#make clean
Making clean in include make[1]: Entering directory `/var/downloads/iptables-1.4.4/include' rm -rf .libs _libs rm -f *.lo make[1]: Leaving directory `/var/downloads/iptables-1.4.4/include' Making clean in extensions make[1]: Entering directory `/var/downloads/iptables-1.4.4/extensions' rm -f *.o *.oo *.so *.a {matches,targets}[46].man initext4.c initext6.c; make[1]: Leaving directory `/var/downloads/iptables-1.4.4/extensions' Making clean in . ... rm -f ip6tables-multi ip6tables-multi rm -f ip6tables-restore ip6tables-restore rm -f ip6tables-save ip6tables-save rm -f *.o rm -f libiptc/libip4tc.o rm -f libiptc/libip4tc.lo rm -f libiptc/libip6tc.o rm -f libiptc/libip6tc.lo rm -f *.lo make[1]: Leaving directory `/var/downloads/iptables-1.4.4' |
#./configure --help
`configure' configures iptables 1.4.4 to adapt to many kinds of systems. Usage: ./configure [OPTION]... [VAR=VALUE]... To assign environment variables (e.g., CC, CFLAGS...), specify them as VAR=VALUE. See below for descriptions of some of the useful variables. Defaults for the options are specified in brackets. Configuration: -h, --help display this help and exit --help=short display options specific to this package --help=recursive display the short help of all the included packages -V, --version display version information and exit ... CC C compiler command CFLAGS C compiler flags LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a nonstandard directory <lib dir> LIBS libraries to pass to the linker, e.g. -l<library> CPPFLAGS C/C++/Objective C preprocessor flags, e.g. -I<include dir> if you have headers in a nonstandard directory <include dir> CPP C preprocessor Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. |
#less
|
#less INSTALL
|
#make
make all-recursive make[1]: Entering directory `/var/downloads/iptables-1.4.4' Making all in extensions make[2]: Entering directory `/var/downloads/iptables-1.4.4/extensions' GEN initext4.c CC initext4.o AR libext4.a GEN initext6.c CC initext6.o AR libext6.a ... libtool: link: gcc -D_LARGEFILE_SOURCE=1 -D_LARGE_FILES -D_FILE_OFFSET_BITS=64 -D_REENTRANT -Wall -Waggregate-return -Wmissing-declarations -Wmissing-prototypes -Wredundant-decls -Wshadow -Wstrict-prototypes -Winline -pipe -DXTABLES_LIBDIR=\"//libexec/xtables\" -DXTABLES_INTERNAL -I./include -I./include -I /usr/src/linux-2.6.32.3/include -I /usr/src/linux-2.6.32.3/include -g -O2 -o .libs/ip6tables depbase=`echo ip6tables-save.o | sed 's|[^/]*$|.deps/&|;s|\.o$||'`; \ if gcc -DHAVE_CONFIG_H -I. -I. -I. -D_LARGEFILE_SOURCE=1 -D_LARGE_FILES -D_FILE_OFFSET_BITS=64 -D_REENTRANT -Wall -Waggregate-return -Wmissing-declarations -Wmissing-prototypes -Wredundant-decls -Wshadow -Wstrict-prototypes -Winline -pipe -DXTABLES_LIBDIR=\"//libexec/xtables\" -DXTABLES_INTERNAL -I./include -I./include -I /usr/src/linux-2.6.32.3/include -I /usr/src/linux-2 then mv -f "$depbase.Tpo" "$depbase.Po"; else rm -f "$depbase.Tpo"; exit 1; fi /bin/sh ./libtool --tag=CC --mode=link gcc -D_LARGEFILE_SOURCE=1 -D_LARGE_FILES -D_FILE_OFFSET_BITS=64 -D_REENTRANT -Wall -Waggregate-return -Wmissing-declarations -Wmissing-prototypes -Wredundant-decls -Wshadow -Wstrict-prototypes -Winline -pipe -DXTABLES_LIBDIR=\"//libexec/xtables\" -DXTABLES_INTERNAL -I./include -I./include -I /usr/src/linux-2.6.32.3/include -I /usr/src/linux-2.6.32.3/ libtool: link: gcc -D_LARGEFILE_SOURCE=1 -D_LARGE_FILES -D_FILE_OFFSET_BITS=64 -D_REENTRANT -Wall -Waggregate-return -Wmissing-declarations -Wmissing-prototypes -Wredundant-decls -Wshadow -Wstrict-prototypes -Winline -pipe -DXTABLES_LIBDIR=\"//libexec/xtables\" -DXTABLES_INTERNAL -I./include -I./include -I /usr/src/linux-2.6.32.3/include -I /usr/src/linux-2.6.32.3/include -g -O2 -o .libs/ip6tables sed -e 's/@PACKAGE_AND_VERSION@/iptables 1.4.4/g' -e '/@MATCH@/ r extensions/matches4.man' -e '/@TARGET@/ r extensions/targets4.man' iptables.8.in >iptables.8; sed -e 's/@PACKAGE_AND_VERSION@/iptables 1.4.4/g' -e '/@MATCH@/ r extensions/matches6.man' -e '/@TARGET@/ r extensions/targets6.man' ip6tables.8.in >ip6tables.8; make[2]: Leaving directory `/var/downloads/iptables-1.4.4' make[1]: Leaving directory `/var/downloads/iptables-1.4.4' |
#make install
Making install in extensions make[1]: Entering directory `/var/downloads/iptables-1.4.4/extensions' if test -n " libxt_CLASSIFY.so libxt_cluster.so libxt_comment.so libxt_connbytes.so libxt_connlimit.so libxt_connmark.so libxt_CONNMARK.so libxt_CONNSECMARK.so libxt_conntrack.so libxt_dccp.so libxt_dscp.so libxt_DSCP.so libxt_esp.so libxt_hashlimit.so libxt_helper.so libxt_IMQ.so libxt_iprange.so libxt_length.so libxt_limit.so libxt_mac.so libxt_mark.so libxt_MARK.so libxt_multiport.so libxt_NFLO make[1]: Leaving directory `/var/downloads/iptables-1.4.4/extensions' Making install in include make[1]: Entering directory `/var/downloads/iptables-1.4.4/include' make[2]: Entering directory `/var/downloads/iptables-1.4.4/include' make[2]: Цель `install-exec-am' не требует выполнения команд. test -z "//include" || mkdir -p -- "//include" test -z "//include" || mkdir -p -- "//include" ... /usr/bin/install -c -m 644 './iptables-save.8' '//share/man/man8/iptables-save.8' /usr/bin/install -c -m 644 './iptables-xml.8' '//share/man/man8/iptables-xml.8' /usr/bin/install -c -m 644 './ip6tables.8' '//share/man/man8/ip6tables.8' /usr/bin/install -c -m 644 './ip6tables-restore.8' '//share/man/man8/ip6tables-restore.8' /usr/bin/install -c -m 644 './ip6tables-save.8' '//share/man/man8/ip6tables-save.8' test -z "//lib/pkgconfig" || mkdir -p -- "//lib/pkgconfig" /usr/bin/install -c -m 644 'libiptc.pc' '//lib/pkgconfig/libiptc.pc' /usr/bin/install -c -m 644 'xtables.pc' '//lib/pkgconfig/xtables.pc' make[2]: Leaving directory `/var/downloads/iptables-1.4.4' make[1]: Leaving directory `/var/downloads/iptables-1.4.4' |
#iptables -V
iptables v1.4.4 |
#halt
|
#ipt
ipt iptables-restore iptm iptunnel iptables iptables-save iptml iptables-apply iptables-xml iptt iptables-multi iptl ipttl |
#iptl
Chain INPUT (policy ACCEPT) target prot opt source destination Chain FORWARD (policy ACCEPT) target prot opt source destination Chain OUTPUT (policy ACCEPT) target prot opt source destination |
#ipt -V
iptables v1.4.4 |
#modprobe -r sch_esfq
|
#ip -V
ip utility, iproute2-ss080725 |
#ip -V
ip utility, iproute2-ss080725 |
Время первой команды журнала | 15:08:15 2010- 2-26 | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Время последней команды журнала | 17:03:22 2010- 2-26 | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Количество командных строк в журнале | 101 | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Процент команд с ненулевым кодом завершения, % | 20.79 | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Процент синтаксически неверно набранных команд, % | 0.00 | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Суммарное время работы с терминалом *, час | 1.92 | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Количество командных строк в единицу времени, команда/мин | 0.88 | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Частота использования команд |
|
В журнал автоматически попадают все команды, данные в любом терминале системы.
Для того чтобы убедиться, что журнал на текущем терминале ведётся, и команды записываются, дайте команду w. В поле WHAT, соответствующем текущему терминалу, должна быть указана программа script.
Команды, при наборе которых были допущены синтаксические ошибки, выводятся перечёркнутым текстом:
$ l s-l bash: l: command not found |
Если код завершения команды равен нулю, команда была выполнена без ошибок. Команды, код завершения которых отличен от нуля, выделяются цветом.
$ test 5 -lt 4 |
Команды, ход выполнения которых был прерван пользователем, выделяются цветом.
$ find / -name abc find: /home/devi-orig/.gnome2: Keine Berechtigung find: /home/devi-orig/.gnome2_private: Keine Berechtigung find: /home/devi-orig/.nautilus/metafiles: Keine Berechtigung find: /home/devi-orig/.metacity: Keine Berechtigung find: /home/devi-orig/.inkscape: Keine Berechtigung ^C |
Команды, выполненные с привилегиями суперпользователя, выделяются слева красной чертой.
# id uid=0(root) gid=0(root) Gruppen=0(root) |
Изменения, внесённые в текстовый файл с помощью редактора, запоминаются и показываются в журнале в формате ed. Строки, начинающиеся символом "<", удалены, а строки, начинающиеся символом ">" -- добавлены.
$ vi ~/.bashrc
|
Для того чтобы изменить файл в соответствии с показанными в диффшоте изменениями, можно воспользоваться командой patch. Нужно скопировать изменения, запустить программу patch, указав в качестве её аргумента файл, к которому применяются изменения, и всавить скопированный текст:
$ patch ~/.bashrc |
Для того чтобы получить краткую справочную информацию о команде, нужно подвести к ней мышь. Во всплывающей подсказке появится краткое описание команды.
Если справочная информация о команде есть, команда выделяется голубым фоном, например: vi. Если справочная информация отсутствует, команда выделяется розовым фоном, например: notepad.exe. Справочная информация может отсутствовать в том случае, если (1) команда введена неверно; (2) если распознавание команды LiLaLo выполнено неверно; (3) если информация о команде неизвестна LiLaLo. Последнее возможно для редких команд.
Большие, в особенности многострочные, всплывающие подсказки лучше всего показываются браузерами KDE Konqueror, Apple Safari и Microsoft Internet Explorer. В браузерах Mozilla и Firefox они отображаются не полностью, а вместо перевода строки выводится специальный символ.
Время ввода команды, показанное в журнале, соответствует времени начала ввода командной строки, которое равно тому моменту, когда на терминале появилось приглашение интерпретатора
Имя терминала, на котором была введена команда, показано в специальном блоке. Этот блок показывается только в том случае, если терминал текущей команды отличается от терминала предыдущей.
Вывод не интересующих вас в настоящий момент элементов журнала, таких как время, имя терминала и других, можно отключить. Для этого нужно воспользоваться формой управления журналом вверху страницы.
Небольшие комментарии к командам можно вставлять прямо из командной строки. Комментарий вводится прямо в командную строку, после символов #^ или #v. Символы ^ и v показывают направление выбора команды, к которой относится комментарий: ^ - к предыдущей, v - к следующей. Например, если в командной строке было введено:
$ whoami
user
$ #^ Интересно, кто я?в журнале это будет выглядеть так:
$ whoami
user
Интересно, кто я? |
Если комментарий содержит несколько строк, его можно вставить в журнал следующим образом:
$ whoami
user
$ cat > /dev/null #^ Интересно, кто я?
Программа whoami выводит имя пользователя, под которым мы зарегистрировались в системе. - Она не может ответить на вопрос о нашем назначении в этом мире.В журнале это будет выглядеть так:
$ whoami user
|
Комментарии, не относящиеся непосредственно ни к какой из команд, добавляются точно таким же способом, только вместо симолов #^ или #v нужно использовать символы #=
1 2 3 4Группы команд, выполненных на разных терминалах, разделяются специальной линией. Под этой линией в правом углу показано имя терминала, на котором выполнялись команды. Для того чтобы посмотреть команды только одного сенса, нужно щёкнуть по этому названию.
LiLaLo (L3) расшифровывается как Live Lab Log.
Программа разработана для повышения эффективности обучения Unix/Linux-системам.
(c) Игорь Чубин, 2004-2008