Community discussions

MikroTik App
 
tanxw998
newbie
Topic Author
Posts: 37
Joined: Mon Jun 04, 2007 7:10 pm

solve the load balancing problem of linux on irq in SMP

Wed Aug 26, 2009 6:35 am

/*
* BOTTOM_SOFTIRQ_NET
* An implementation of bottom softirq concurrent execution on SMP.
* This is implemented by splitting current net softirq into top half
* and bottom half and dispatching the bottom half to each cpu's workqueue
* Hopefully, it can raise the throughput of NIC when running iptalbes
* on SMP machine.
*
* This is BS version 2(BS2), it make SMP parallelization for all
* other protocols beside ipv4, for example, bridge, packet raw, etc.
*
* Version: $Id: bs_smp.c, v1.0 for kernel versions:
* 2.6.13-15 for kernel 2.6.13-15-smp. fully tested
* the other versions need more testing.
*
* Authors: John Ye & Qianyu Ye, 2007.08.27
*/

/* user must select one of the following versions. no guarantee to work.
#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 0), this is better.
*/

/*
#define KERNEL_VERSION_2_6_13 //2.6.13-15 OK
#define KERNEL_VERSION_2_6_16__ //2.6.16.53 OK
#define KERNEL_VERSION_2_6_17__ //2.6.17.9 #3 OK
#define KERNEL_VERSION_2_6_18__ //2.6.18.8 & 2.6.18.2-34 OK
#define KERNEL_VERSION_2_6_19__ //2.6.19 #1 OK
#define KERNEL_VERSION_2_6_20__ //2.6.20 OK
#define KERNEL_VERSION_2_6_21__ //2.6.21.1 OK
#define KERNEL_VERSION_2_6_22__ //2.6.22.5 OK
#define KERNEL_VERSION_2_6_23__ //2.6.23-rc8 OK
*/

/*
# Makefile for kernel version 2.6.x.
ifneq ($(KERNELRELEASE),)
debug-objs := bs_smp.o
obj-m := bs_smp.o
CFLAGS += -w -Wimplicit-function-declaration
else
PWD := $(shell pwd)
KVER ?= $(shell uname -r)
KDIR := /lib/modules/$(KVER)/build
all: $(MAKE) -C $(KDIR) M=$(PWD)
clean: rm -rf .*.cmd *.o *.mod.c *.ko .tmp_versions
endif
*/
#include <linux/version.h>

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13) && LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
#include <linux/config.h>
#endif

#include <asm/debugreg.h>
#include <asm/desc.h>
#include <asm/i387.h>
#include <asm/ldt.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/unaligned.h>
#include <linux/aio.h>
#include <linux/backing-dev.h>
#include <linux/bio.h>
#include <linux/buffer_head.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/fs.h>
#include <linux/highmem.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/inetdevice.h>
#include <linux/init.h>
#include <linux/input.h>
#include <linux/interrupt.h>
#include <linux/ipsec.h>
#include <linux/kernel.h>
#include <linux/kmod.h>
#include <linux/list.h>
#include <linux/major.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/mroute.h>
#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netlink.h>
#include <linux/pagemap.h>
#include <linux/pm.h>
#include <linux/poll.h>
#include <linux/proc_fs.h>
#include <linux/ptrace.h>
#include <linux/random.h>
#include <linux/romfs_fs.h>
#include <linux/sched.h>
#include <linux/security.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/string.h>
#include <linux/swap.h>
#include <linux/sysctl.h>
#include <linux/types.h>
#include <linux/user.h>
#include <linux/vfs.h>
#include <linux/workqueue.h>
#include <net/arp.h>
#include <net/checksum.h>
#include <net/icmp.h>
#include <net/inet_common.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/raw.h>
#include <net/route.h>
#include <net/snmp.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <net/xfrm.h>

//form dev.c
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
#include <linux/cpu.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/if_ether.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/notifier.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/rtnetlink.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/stat.h>
#include <linux/if_bridge.h>

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13) && LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,19)
#include <linux/divert.h>
#endif

#include <net/dst.h>
#include <net/pkt_sched.h>
#include <net/checksum.h>
#include <linux/highmem.h>
#include <linux/init.h>
#include <linux/kmod.h>
#include <linux/module.h>
#include <linux/kallsyms.h>
//#include <linux/netpoll.h>
#include <linux/rcupdate.h>
#include <linux/delay.h>

#include <linux/ip.h> //johnye

#ifdef CONFIG_NET_RADIO
#include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
#include <net/iw_handler.h>
#endif /* CONFIG_NET_RADIO */
#include <asm/current.h>


#define CONFIG_BOTTOM_SOFTIRQ_MODULE
//#undef CONFIG_NET_CLS_ACT //testing only.

#define TAPFUNC "netif_receive_skb"

static int (*p_tapped)();

static spinlock_t *p_ptype_lock;
static struct list_head *p_ptype_base; /* 16 way hashed list */
#define ptype_base p_ptype_base
static struct list_head *p_ptype_all; /* Taps */
#define ptype_all (*p_ptype_all)

static struct workqueue_struct **Pkeventd_wq; //why not same like 2.6.13?
#define keventd_wq (*Pkeventd_wq)

//this is little tricky. __netpoll_rx is defined in netpoll.h
//static int (*pp__netpoll_rx)(struct sk_buff *skb); //__netpoll_rx is in net/core/netpoll.c, netpoll_rx in netpoll.h
#define __netpoll_rx (*p__netpoll_rx)
#include <linux/netpoll.h> //it use __netpoll_rx, __netpoll_rx is in net/core/netpoll.c

/* When > 0 there are consumers of rx skb time stamps */
static atomic_t *p_netstamp_needed; // = ATOMIC_INIT(0);
#define netstamp_needed (*p_netstamp_needed)


#ifdef CONFIG_NET_CLS_ACT
static int (*p_ing_filter)(struct sk_buff *skb);
//#define ing_filter (*p_ing_filter)
#endif

extern DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat); // = { 0, };

static void (*p__queue_work)(struct cpu_workqueue_struct *cwq, struct work_struct *work);
#define __queue_work (*p__queue_work)

void (*p_ip_rcv)();

static struct {
void *feed;
char *symb;
} ___vars[] = {
{ &p_ptype_lock, "ptype_lock" },
{ &p_ptype_base, "ptype_base" },
{ &p_ptype_all, "ptype_all" },
{ &Pkeventd_wq, "keventd_wq" },
{ &p__queue_work, "__queue_work" },
{ &p__netpoll_rx, "__netpoll_rx" },
{ &p_netstamp_needed, "netstamp_needed" },
#ifdef CONFIG_NET_CLS_ACT
{ &p_ing_filter, "ing_filter" },
#endif
{ 0, 0 }
};

/*
if(!(p_ptype_lock = sysmap_name2addr("ptype_lock"))) return 1;
if(!(p_ptype_base = sysmap_name2addr("ptype_base"))) return 1;
if(!(p_ptype_all = sysmap_name2addr("ptype_all"))) return 1;
if(!(Pkeventd_wq = sysmap_name2addr("keventd_wq"))) return 1;
if(!(p__queue_work = sysmap_name2addr("__queue_work"))) return 1;
if(!(p__netpoll_rx = sysmap_name2addr("__netpoll_rx"))) return 1;
if(!(p_netstamp_needed = sysmap_name2addr("netstamp_needed"))) return 1;
if(!(p_ing_filter = sysmap_name2addr("ing_filter"))) return 1;
*/

#define PATCH_START net/core/dev.c
#define CONFIG_BOTTOM_SOFTIRQ_SMP
#define CONFIG_BOTTOM_SOFTIRQ_SMP_SYSCTL

//#ifdef CONFIG_BOTTOM_SOFTIRQ_SMP

/*
[PATCH: 2.6.13-15-SMP 1/2] network: concurrently run softirq network code on SMP
Bottom Softirq Implementation. John Ye, 2007.08.27

Why this patch:
Make kernel be able to concurrently execute softirq's net code on SMP system.
Take full advantages of SMP to handle more packets and greatly raises NIC throughput.
The current kernel's net packet processing logic is:
1) The CPU which handles a hardirq must be executing its related softirq.
2) One softirq instance(irqs handled by 1 CPU) can't be executed on more than 2 CPUs
at the same time.
The limitation make kernel network be hard to take the advantages of SMP.

How this patch:
It splits the current softirq code into 2 parts: the cpu-sensitive top half,
and the cpu-insensitive bottom half, then make bottom half(calld BS) be
executed on SMP concurrently.
The two parts are not equal in terms of size and load. Top part has constant code
size(mainly, in net/core/dev.c and NIC drivers), while bottom part involves
netfilter(iptables) whose load varies very much. An iptalbes with 1000 rules to match
will make the bottom part's load be very high. So, if the bottom part softirq
can be distributed to processors and run concurrently on them, the network will
gain much more packet handling capacity, network throughput will be be increased
remarkably.

Where useful:
It's useful on SMP machines that meet the following 2 conditions:
1) have high kernel network load, for example, running iptables with thousands of rules, etc).
2) have more CPUs than active NICs, e.g. a 4 CPUs machine with 2 NICs).
On these system, with the increase of softirq load, some CPUs will be idle
while others(number is equal to # of NIC) keeps busy.
IRQBALANCE will help, but it only shifts IRQ among CPUS, makes no softirq concurrency.
Balancing the load of each cpus will not remarkably increase network speed.

Where NOT useful:
If the bottom half of softirq is too small(without running iptables), or the network
is too idle, BS patch will not be seen to have visible effect. But It has no
negative affect either.
User can turn off BS functionality by set /proc/sys/net/bs_policy value to 0.

How to test:
On a linux box, run iptables, add 2000 rules to table filter & table nat to simulate huge
softirq load. Then, open 20 ftp sessions to download big file. On another machine(who
use this test machine as gateway), open 20 more ftp download sessions. Compare the speed,
without BS enabled, and with BS enabled.
cat /proc/sys/net/bs_policy. 1 for flow dispatch, 2 random dispatch. 0 no dispatch.
cat /proc/sys/net/bs_status. this shows the usage of each CPUs
Test shown that when bottom softirq load is high, the network throughput can be nearly
doubled on 2 CPUs machine. hopefully it may be quadrupled on a 4 cpus linux box.

Bugs:
It will NOT allow hotplug CPU.
It only allows incremental CPUs ids, starting from 0 to num_online_cpus().
for example, 0,1,2,3 is OK. 0,1,8,9 is KO.

Some considerations in the future:
1) With BS patch, the irq balance code on arch/i386/kernel/io_apic.c seems no need any more,
at least not for network irq.
2) Softirq load will become very small. It only run the top half of old softirq, which
is much less expensive than bottom half---the netfilter program.
To let top softirq process more packets, can these 3 network parameters be given a larger value?
extern int netdev_max_backlog = 1000;
extern int netdev_budget = 300;
extern int weight_p = 64;
3) Now, BS are running on built-in keventd thread, we can create new workqueues to let it run on?

Signed-off-by: John Ye (Seeker) <johny@webizmail.com>
*/

#define CBPTR( skb ) (*((void **)(skb->cb)))
#define BS_USE_PERCPU_DATA
struct cpu_stat
{
unsigned long irqs; //total irqs
unsigned long dids; //I did,
unsigned long works;
};
#define BS_CPU_STAT_DEFINED

static int nr_cpus = 0;

#define BS_POL_LINK 1
#define BS_POL_RANDOM 2
int bs_policy = BS_POL_LINK;

static DEFINE_PER_CPU(struct sk_buff_head, bs_cpu_queues);
static DEFINE_PER_CPU(struct work_struct, bs_works);
//static DEFINE_PER_CPU(struct cpu_stat, bs_cpu_status);
struct cpu_stat bs_cpu_status[NR_CPUS];

//static int __netif_recv_skb(struct sk_buff *skb, struct net_device *odev);
static int __netif_recv_skb(struct sk_buff *skb);

static void bs_func(void *data)
{
int num, cpu;
struct sk_buff *skb;
struct work_struct *bs_works;
struct sk_buff_head *q;
cpu = smp_processor_id();

bs_works = &per_cpu(bs_works, cpu);
q = &per_cpu(bs_cpu_queues, cpu);

restart:
num = 0;
while(1)
{
spin_lock(&q->lock);
if(!(skb = __skb_dequeue(q))) {
spin_unlock(&q->lock);
break;
}
spin_unlock(&q->lock);
num++;

local_bh_disable();
__netif_recv_skb(skb);
local_bh_enable(); // sub_preempt_count(SOFTIRQ_OFFSET - 1);
}

bs_cpu_status[cpu].dids += num;
if(num > 8) printk("%d on cpu %d\n", num, cpu);
if(num > 0) goto restart;

bs_works->func = 0;

return;
}


#undef PATCH_START

#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,13)
/* COPY_IN_START_FROM kernel/workqueue.c */
struct cpu_workqueue_struct
{
spinlock_t lock;

long remove_sequence; /* Least-recently added (next to run) */
long insert_sequence; /* Next to add */

struct list_head worklist;
wait_queue_head_t more_work;
wait_queue_head_t work_done;

struct workqueue_struct *wq;
struct task_struct *thread; //task_t if 2.6.13

int run_depth; /* Detect run_workqueue() recursion depth */
} ____cacheline_aligned;

struct workqueue_struct
{
struct cpu_workqueue_struct cpu_wq[NR_CPUS];
const char *name;
struct list_head list; /* Empty if single thread */
};
//extern struct workqueue_struct *keventd_wq;
#endif


#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) && LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,19)
struct cpu_workqueue_struct {

spinlock_t lock;

long remove_sequence; /* Least-recently added (next to run) */
long insert_sequence; /* Next to add */

struct list_head worklist;
wait_queue_head_t more_work;
wait_queue_head_t work_done;

struct workqueue_struct *wq;
struct task_struct *thread;

int run_depth; /* Detect run_workqueue() recursion depth */
} ____cacheline_aligned;

/*
* The externally visible workqueue abstraction is an array of
* per-CPU workqueues:
*/
struct workqueue_struct {
struct cpu_workqueue_struct *cpu_wq;
const char *name;
struct list_head list; /* Empty if single thread */
};

#endif

#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,20) || LINUX_VERSION_CODE == KERNEL_VERSION(2,6,21)
struct cpu_workqueue_struct {

spinlock_t lock;

long remove_sequence; /* Least-recently added (next to run) */
long insert_sequence; /* Next to add */

struct list_head worklist;
wait_queue_head_t more_work;
wait_queue_head_t work_done;

struct workqueue_struct *wq;
struct task_struct *thread;

int run_depth; /* Detect run_workqueue() recursion depth */

int freezeable; /* Freeze the thread during suspend */
} ____cacheline_aligned;

/*
* The externally visible workqueue abstraction is an array of
* per-CPU workqueues:
*/
struct workqueue_struct {
struct cpu_workqueue_struct *cpu_wq;
const char *name;
struct list_head list; /* Empty if single thread */
};
#endif

#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,22) || LINUX_VERSION_CODE == KERNEL_VERSION(2,6,23)
struct cpu_workqueue_struct {

spinlock_t lock;

struct list_head worklist;
wait_queue_head_t more_work;
struct work_struct *current_work;

struct workqueue_struct *wq;
struct task_struct *thread;

int run_depth; /* Detect run_workqueue() recursion depth */
} ____cacheline_aligned;

struct workqueue_struct {
struct cpu_workqueue_struct *cpu_wq;
struct list_head list;
const char *name;
int singlethread;
int freezeable; /* Freeze threads during suspend */
};

#endif


#define PATCH_START

#ifndef CONFIG_BOTTOM_SOFTIRQ_MODULE
extern void __queue_work(struct cpu_workqueue_struct *cwq, struct work_struct *work);
extern struct workqueue_struct *keventd_wq;
#endif
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/tcp.h>

static inline int bs_dispatch(struct sk_buff *skb)
{

#ifdef CONFIG_BOTTOM_SOFTIRQ_SMP

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
struct iphdr *iph = ip_hdr(skb);
#else
struct iphdr *iph = skb->nh.iph;
#endif
if(!nr_cpus)
nr_cpus = num_online_cpus();

/*
struct tcphdr {
__u16 source;
__u16 dest;
__u32 seq;
};
*/
if(bs_policy && nr_cpus > 1) { // && iph->protocol != IPPROTO_ICMP) {
//if(bs_policy && nr_cpus > 1 && iph->protocol == IPPROTO_ICMP) { //test on icmp first
unsigned int cur, cpu;
struct work_struct *bs_works;
struct sk_buff_head *q;

cpu = cur = smp_processor_id();

bs_cpu_status[cur].irqs++;

//good point for Jamal. thanks no reordering
if(bs_policy == BS_POL_LINK) {
int seed = 0;
if(iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP) {
struct tcphdr *th = (struct tcphdr*)(iph + 1); //udp is same as tcp
seed = ntohs(th->source) + ntohs(th->dest);
}
cpu = (iph->saddr + iph->daddr + seed) % nr_cpus;

/*
if(net_ratelimit() && iph->protocol == IPPROTO_TCP) {
struct tcphdr *th = iph + 1;

printk("seed %u (%u %u) cpu %d. source %d dest %d\n",
seed, iph->saddr + iph->daddr, iph->saddr + iph->daddr + seed, cpu,
ntohs(th->source), ntohs(th->dest));
}
*/
} else
//random distribute
if(bs_policy == BS_POL_RANDOM)
cpu = (bs_cpu_status[cur].irqs % nr_cpus);

//cpu = cur;
//cpu = (cur? 0: 1);

if(cpu == cur) {
bs_cpu_status[cpu].dids++;
return __netif_recv_skb(skb);
}

q = &per_cpu(bs_cpu_queues, cpu);

if(!q->next) {
skb_queue_head_init(q);
}

spin_lock(&q->lock);
__skb_queue_tail(q, skb);
spin_unlock(&q->lock);

bs_works = &per_cpu(bs_works, cpu);
if (!bs_works->func) {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,19)
INIT_WORK(bs_works, bs_func, 0);
#else
INIT_WORK(bs_works, bs_func);
#endif
bs_cpu_status[cpu].works++;
preempt_disable();
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
set_bit(WORK_STRUCT_PENDING, work_data_bits(bs_works));
#endif
__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), bs_works);
preempt_enable();
#else
INIT_WORK(bs_works, bs_func, q);
bs_cpu_status[cpu].works++;
preempt_disable();
__queue_work(keventd_wq->cpu_wq + cpu, bs_works);
preempt_enable();
#endif

}

} else {

bs_cpu_status[smp_processor_id()].dids++;
return __netif_recv_skb(skb);
}
return 0;
#else
return __netif_recv_skb(skb);
#endif
}
#undef PATCH_START



#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,13)

#ifdef CONFIG_NET_CLS_ACT
static int ing_filter(struct sk_buff *skb)
{
struct Qdisc *q;
struct net_device *dev = skb->dev;
int result = TC_ACT_OK;

if (dev->qdisc_ingress) {
__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
if (MAX_RED_LOOP < ttl++) {
printk("Redir loop detected Dropping packet (%s->%s)\n",
skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
return TC_ACT_SHOT;
}

skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);

skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
if (NULL == skb->input_dev) {
skb->input_dev = skb->dev;
printk("ing_filter: fixed %s out %s\n",skb->input_dev->name,skb->dev->name);
}
spin_lock(&dev->ingress_lock);
if ((q = dev->qdisc_ingress) != NULL)
result = q->enqueue(skb, q);
spin_unlock(&dev->ingress_lock);

}

return result;
}
#endif
static inline void net_timestamp(struct timeval *stamp)
{
if (atomic_read(&netstamp_needed))
do_gettimeofday(stamp);
else {
stamp->tv_sec = 0;
stamp->tv_usec = 0;
}
}

static __inline__ int deliver_skb(struct sk_buff *skb,
struct packet_type *pt_prev)
{
atomic_inc(&skb->users);
return pt_prev->func(skb, skb->dev, pt_prev);
}
#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
struct net_bridge;
struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
unsigned char *addr);
void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);

static __inline__ int handle_bridge(struct sk_buff **pskb,
struct packet_type **pt_prev, int *ret)
{
struct net_bridge_port *port;

if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
(port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
return 0;

if (*pt_prev) {
*ret = deliver_skb(*pskb, *pt_prev);
*pt_prev = NULL;
}

return br_handle_frame_hook(port, pskb);
}
#else
#define handle_bridge(skb, pt_prev, ret) (0)
#endif

static __inline__ void skb_bond(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;

if (dev->master) {
skb->real_dev = skb->dev;
skb->dev = dev->master;
}
}

int REP_netif_receive_skb(struct sk_buff *skb)
{
//struct packet_type *ptype, *pt_prev;
//int ret = NET_RX_DROP;
//unsigned short type;

/* if we've gotten here through NAPI, check netpoll */
if (skb->dev->poll && netpoll_rx(skb))
return NET_RX_DROP;

if (!skb->stamp.tv_sec)
net_timestamp(&skb->stamp);

skb_bond(skb);

//__get_cpu_var(netdev_rx_stat).total++;

skb->h.raw = skb->nh.raw = skb->data;
skb->mac_len = skb->nh.raw - skb->mac.raw;

return bs_dispatch(skb);
}


int __netif_recv_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
int ret = NET_RX_DROP;
unsigned short type;

pt_prev = NULL;

rcu_read_lock();

if(CBPTR(skb))
printk("+\n");

#ifdef CONFIG_NET_CLS_ACT
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
goto ncls;
}
#endif

//packet tap
list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev) {
ret = deliver_skb(skb, pt_prev);
}
pt_prev = ptype;
}
}

#ifdef CONFIG_NET_CLS_ACT
if (pt_prev) {
ret = deliver_skb(skb, pt_prev);
pt_prev = NULL; /* noone else should process this after*/
} else {
skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
}

ret = ing_filter(skb);

if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
kfree_skb(skb);
goto out;
}

skb->tc_verd = 0;
ncls:
#endif

handle_diverter(skb);

if (handle_bridge(&skb, &pt_prev, &ret))
goto out;

type = skb->protocol;
list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev) {
ret = deliver_skb(skb, pt_prev); //increase skb->users
}
pt_prev = ptype;
}
}

if (pt_prev) {
ret = pt_prev->func(skb, skb->dev, pt_prev);
} else {
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
ret = NET_RX_DROP;
}

out:
rcu_read_unlock();

return ret;
}
#endif 2.6.13


#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,16)

#ifdef CONFIG_NET_CLS_ACT
/* TODO: Maybe we should just force sch_ingress to be compiled in
* when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
* a compare and 2 stores extra right now if we dont have it on
* but have CONFIG_NET_CLS_ACT
* NOTE: This doesnt stop any functionality; if you dont have
* the ingress scheduler, you just cant add policies on ingress.
*
*/
static int ing_filter(struct sk_buff *skb)
{
struct Qdisc *q;
struct net_device *dev = skb->dev;
int result = TC_ACT_OK;

if (dev->qdisc_ingress) {
__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
if (MAX_RED_LOOP < ttl++) {
printk("Redir loop detected Dropping packet (%d->%d)\n",
skb->iif, skb->dev->ifindex);
return TC_ACT_SHOT;
}

skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);

skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);

spin_lock(&dev->queue_lock);
if ((q = dev->qdisc_ingress) != NULL)
result = q->enqueue(skb, q);
spin_unlock(&dev->queue_lock);

}

return result;
}
#endif
static __inline__ int deliver_skb(struct sk_buff *skb,
struct packet_type *pt_prev,
struct net_device *orig_dev)
{
atomic_inc(&skb->users);
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}
static inline void net_timestamp(struct sk_buff *skb)
{
if (atomic_read(&netstamp_needed))
__net_timestamp(skb);
else {
skb->tstamp.off_sec = 0;
skb->tstamp.off_usec = 0;
}
}

#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
struct net_bridge;
struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
unsigned char *addr);
void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);

static __inline__ int handle_bridge(struct sk_buff **pskb,
struct packet_type **pt_prev, int *ret,
struct net_device *orig_dev)
{
struct net_bridge_port *port;

if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
(port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
return 0;

if (*pt_prev) {
*ret = deliver_skb(*pskb, *pt_prev, orig_dev);
*pt_prev = NULL;
}

return br_handle_frame_hook(port, pskb);
}
#else
#define handle_bridge(skb, pt_prev, ret, orig_dev) (0)
#endif
static inline struct net_device *skb_bond(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;

if (dev->master)
skb->dev = dev->master;

return dev;
}
int REP_netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
int ret = NET_RX_DROP;
unsigned short type;

/* if we've gotten here through NAPI, check netpoll */
if (skb->dev->poll && netpoll_rx(skb))
return NET_RX_DROP;

if (!skb->tstamp.off_sec)
net_timestamp(skb);

if (!skb->iif)
skb->iif = skb->dev->ifindex;

orig_dev = skb_bond(skb);

//__get_cpu_var(netdev_rx_stat).total++;

skb->h.raw = skb->nh.raw = skb->data;
skb->mac_len = skb->nh.raw - skb->mac.raw;

CBPTR(skb) = orig_dev;
return bs_dispatch(skb);
}

__netif_recv_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
int ret = NET_RX_DROP;
unsigned short type;

orig_dev = CBPTR(skb);
CBPTR(skb) = 0;

pt_prev = NULL;

rcu_read_lock();

#ifdef CONFIG_NET_CLS_ACT
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
goto ncls;
}
#endif

list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}

#ifdef CONFIG_NET_CLS_ACT
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL; /* noone else should process this after*/
} else {
skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
}

ret = ing_filter(skb);

if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
kfree_skb(skb);
goto out;
}

skb->tc_verd = 0;
ncls:
#endif

handle_diverter(skb);

if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
goto out;

type = skb->protocol;
list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}

if (pt_prev) {
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
ret = NET_RX_DROP;
}

out:
rcu_read_unlock();
return ret;
}
#endif


#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,17)

#ifdef CONFIG_NET_CLS_ACT
static int ing_filter(struct sk_buff *skb)
{
struct Qdisc *q;
struct net_device *dev = skb->dev;
int result = TC_ACT_OK;

if (dev->qdisc_ingress) {
__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
if (MAX_RED_LOOP < ttl++) {
printk("Redir loop detected Dropping packet (%s->%s)\n",
skb->input_dev->name, skb->dev->name);
return TC_ACT_SHOT;
}

skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);

skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);

spin_lock(&dev->ingress_lock);
if ((q = dev->qdisc_ingress) != NULL)
result = q->enqueue(skb, q);
spin_unlock(&dev->ingress_lock);

}

return result;
}
#endif

static inline void net_timestamp(struct sk_buff *skb)
{
if (atomic_read(&netstamp_needed))
__net_timestamp(skb);
else {
skb->tstamp.off_sec = 0;
skb->tstamp.off_usec = 0;
}
}
static __inline__ int deliver_skb(struct sk_buff *skb,
struct packet_type *pt_prev,
struct net_device *orig_dev)
{
atomic_inc(&skb->users);
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}

#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
struct net_bridge;
struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
unsigned char *addr);
void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);

static __inline__ int handle_bridge(struct sk_buff **pskb,
struct packet_type **pt_prev, int *ret,
struct net_device *orig_dev)
{
struct net_bridge_port *port;

if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
(port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
return 0;

if (*pt_prev) {
*ret = deliver_skb(*pskb, *pt_prev, orig_dev);
*pt_prev = NULL;
}

return br_handle_frame_hook(port, pskb);
}
#else
#define handle_bridge(skb, pt_prev, ret, orig_dev) (0)
#endif
static inline struct net_device *skb_bond(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;

if (dev->master) {
/*
* On bonding slaves other than the currently active
* slave, suppress duplicates except for 802.3ad
* ETH_P_SLOW and alb non-mcast/bcast.
*/
if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
if (dev->master->priv_flags & IFF_MASTER_ALB) {
if (skb->pkt_type != PACKET_BROADCAST &&
skb->pkt_type != PACKET_MULTICAST)
goto keep;
}

if (dev->master->priv_flags & IFF_MASTER_8023AD &&
skb->protocol == __constant_htons(ETH_P_SLOW))
goto keep;

kfree_skb(skb);
return NULL;
}
keep:
skb->dev = dev->master;
}

return dev;
}


int REP_netif_receive_skb(struct sk_buff *skb)
{
//struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
//int ret = NET_RX_DROP;
//unsigned short type;

/* if we've gotten here through NAPI, check netpoll */
if (skb->dev->poll && netpoll_rx(skb))
return NET_RX_DROP;

if (!skb->tstamp.off_sec)
net_timestamp(skb);

if (!skb->input_dev)
skb->input_dev = skb->dev;

orig_dev = skb_bond(skb);

if (!orig_dev)
return NET_RX_DROP;

//__get_cpu_var(netdev_rx_stat).total++;

skb->h.raw = skb->nh.raw = skb->data;
skb->mac_len = skb->nh.raw - skb->mac.raw;

CBPTR(skb) = orig_dev;
return bs_dispatch(skb);
}

__netif_recv_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
int ret = NET_RX_DROP;
unsigned short type;

orig_dev = CBPTR(skb);
CBPTR(skb) = 0;
pt_prev = NULL;

rcu_read_lock();

#ifdef CONFIG_NET_CLS_ACT
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
goto ncls;
}
#endif

list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}

#ifdef CONFIG_NET_CLS_ACT
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL; /* noone else should process this after*/
} else {
skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
}

ret = ing_filter(skb);

if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
kfree_skb(skb);
goto out;
}

skb->tc_verd = 0;
ncls:
#endif

handle_diverter(skb);

if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
goto out;

type = skb->protocol;
list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}

if (pt_prev) {
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
ret = NET_RX_DROP;
}

out:
rcu_read_unlock();
return ret;
}

#endif 17

#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,18)

#ifdef CONFIG_NET_CLS_ACT
static int ing_filter(struct sk_buff *skb)
{
struct Qdisc *q;
struct net_device *dev = skb->dev;
int result = TC_ACT_OK;

if (dev->qdisc_ingress) {
__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
if (MAX_RED_LOOP < ttl++) {
printk(KERN_WARNING "Redir loop detected Dropping packet (%s->%s)\n",
skb->input_dev->name, skb->dev->name);
return TC_ACT_SHOT;
}

skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);

skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);

spin_lock(&dev->ingress_lock);
if ((q = dev->qdisc_ingress) != NULL)
result = q->enqueue(skb, q);
spin_unlock(&dev->ingress_lock);

}

return result;
}
#endif
static inline void net_timestamp(struct sk_buff *skb)
{
if (atomic_read(&netstamp_needed))
__net_timestamp(skb);
else {
skb->tstamp.off_sec = 0;
skb->tstamp.off_usec = 0;
}
}
static inline struct net_device *skb_bond(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;

if (dev->master) {
if (skb_bond_should_drop(skb)) {
kfree_skb(skb);
return NULL;
}
skb->dev = dev->master;
}

return dev;
}
static __inline__ int deliver_skb(struct sk_buff *skb,
struct packet_type *pt_prev,
struct net_device *orig_dev)
{
atomic_inc(&skb->users);
if(pt_prev->func == p_ip_rcv) {
printk(".");
}
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}

#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
struct net_bridge;
struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
unsigned char *addr);
void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);

static __inline__ int handle_bridge(struct sk_buff **pskb,
struct packet_type **pt_prev, int *ret,
struct net_device *orig_dev)
{
struct net_bridge_port *port;

if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
(port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
return 0;

if (*pt_prev) {
*ret = deliver_skb(*pskb, *pt_prev, orig_dev);
*pt_prev = NULL;
}

return br_handle_frame_hook(port, pskb);
}
#else
#define handle_bridge(skb, pt_prev, ret, orig_dev) (0)
#endif


int REP_netif_receive_skb(struct sk_buff *skb)
{
//struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
//int ret = NET_RX_DROP;
//unsigned short type;

/* if we've gotten here through NAPI, check netpoll */
if (skb->dev->poll && netpoll_rx(skb))
return NET_RX_DROP;

if (!skb->tstamp.off_sec)
net_timestamp(skb);

if (!skb->input_dev)
skb->input_dev = skb->dev;

orig_dev = skb_bond(skb);

if (!orig_dev)
return NET_RX_DROP;

//__get_cpu_var(netdev_rx_stat).total++;

skb->h.raw = skb->nh.raw = skb->data;
skb->mac_len = skb->nh.raw - skb->mac.raw;

CBPTR(skb) = orig_dev;
return bs_dispatch(skb);
}

int __netif_recv_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
int ret = NET_RX_DROP;
unsigned short type;

orig_dev = CBPTR(skb);
CBPTR(skb) = 0;

//printk("+");
pt_prev = NULL;

rcu_read_lock();

#ifdef CONFIG_NET_CLS_ACT
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
goto ncls;
}
#endif

list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}

#ifdef CONFIG_NET_CLS_ACT
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL; /* noone else should process this after*/
} else {
skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
}

ret = ing_filter(skb);

if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
kfree_skb(skb);
goto out;
}

skb->tc_verd = 0;
ncls:
#endif

handle_diverter(skb);

if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
goto out;

type = skb->protocol;
list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}

if (pt_prev) {
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
if(pt_prev->func != p_ip_rcv) {
//type, ipv4, arp, llc, etc
//printk("type %d %p \n", type, pt_prev->func);

}
} else {
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
ret = NET_RX_DROP;
}

out:
rcu_read_unlock();
return ret;
}

#endif 18

#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,19)

#ifdef CONFIG_NET_CLS_ACT
static int ing_filter(struct sk_buff *skb)
{
struct Qdisc *q;
struct net_device *dev = skb->dev;
int result = TC_ACT_OK;

if (dev->qdisc_ingress) {
__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
if (MAX_RED_LOOP < ttl++) {
printk(KERN_WARNING "Redir loop detected Dropping packet (%s->%s)\n",
skb->input_dev->name, skb->dev->name);
return TC_ACT_SHOT;
}

skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);

skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);

spin_lock(&dev->ingress_lock);
if ((q = dev->qdisc_ingress) != NULL)
result = q->enqueue(skb, q);
spin_unlock(&dev->ingress_lock);

}

return result;
}
#endif

static inline void net_timestamp(struct sk_buff *skb)
{
if (atomic_read(&netstamp_needed))
__net_timestamp(skb);
else {
skb->tstamp.off_sec = 0;
skb->tstamp.off_usec = 0;
}
}
static inline struct net_device *skb_bond(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;

if (dev->master) {
if (skb_bond_should_drop(skb)) {
kfree_skb(skb);
return NULL;
}
skb->dev = dev->master;
}

return dev;
}

static __inline__ int deliver_skb(struct sk_buff *skb,
struct packet_type *pt_prev,
struct net_device *orig_dev)
{
atomic_inc(&skb->users);
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}

#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
struct net_bridge;
struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
unsigned char *addr);
void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);

static __inline__ int handle_bridge(struct sk_buff **pskb,
struct packet_type **pt_prev, int *ret,
struct net_device *orig_dev)
{
struct net_bridge_port *port;

if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
(port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
return 0;

if (*pt_prev) {
*ret = deliver_skb(*pskb, *pt_prev, orig_dev);
*pt_prev = NULL;
}

return br_handle_frame_hook(port, pskb);
}
#else
#define handle_bridge(skb, pt_prev, ret, orig_dev) (0)
#endif

int REP_netif_receive_skb(struct sk_buff *skb)
{
//struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
//int ret = NET_RX_DROP;
//unsigned short type;

/* if we've gotten here through NAPI, check netpoll */
if (skb->dev->poll && netpoll_rx(skb))
return NET_RX_DROP;

if (!skb->tstamp.off_sec)
net_timestamp(skb);

if (!skb->input_dev)
skb->input_dev = skb->dev;

orig_dev = skb_bond(skb);

if (!orig_dev)
return NET_RX_DROP;

//__get_cpu_var(netdev_rx_stat).total++;

skb->h.raw = skb->nh.raw = skb->data;
skb->mac_len = skb->nh.raw - skb->mac.raw;


CBPTR(skb) = orig_dev;
return bs_dispatch(skb);
}

int __netif_recv_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
int ret = NET_RX_DROP;
unsigned short type;

orig_dev = CBPTR(skb);
CBPTR(skb) = 0;

pt_prev = NULL;

rcu_read_lock();

#ifdef CONFIG_NET_CLS_ACT
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
goto ncls;
}
#endif

list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}

#ifdef CONFIG_NET_CLS_ACT
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL; /* noone else should process this after*/
} else {
skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
}

ret = ing_filter(skb);

if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
kfree_skb(skb);
goto out;
}

skb->tc_verd = 0;
ncls:
#endif

handle_diverter(skb);

if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
goto out;

type = skb->protocol;
list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}

if (pt_prev) {
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
ret = NET_RX_DROP;
}

out:
rcu_read_unlock();
return ret;
}

#endif 19

#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,20)
#ifdef CONFIG_NET_CLS_ACT
static int ing_filter(struct sk_buff *skb)
{
struct Qdisc *q;
struct net_device *dev = skb->dev;
int result = TC_ACT_OK;

if (dev->qdisc_ingress) {
__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
if (MAX_RED_LOOP < ttl++) {
printk(KERN_WARNING "Redir loop detected Dropping packet (%s->%s)\n",
skb->input_dev->name, skb->dev->name);
return TC_ACT_SHOT;
}

skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);

skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);

spin_lock(&dev->ingress_lock);
if ((q = dev->qdisc_ingress) != NULL)
result = q->enqueue(skb, q);
spin_unlock(&dev->ingress_lock);

}

return result;
}
#endif
static inline void net_timestamp(struct sk_buff *skb)
{
if (atomic_read(&netstamp_needed))
__net_timestamp(skb);
else {
skb->tstamp.off_sec = 0;
skb->tstamp.off_usec = 0;
}
}
static inline struct net_device *skb_bond(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;

if (dev->master) {
if (skb_bond_should_drop(skb)) {
kfree_skb(skb);
return NULL;
}
skb->dev = dev->master;
}

return dev;
}
static __inline__ int deliver_skb(struct sk_buff *skb,
struct packet_type *pt_prev,
struct net_device *orig_dev)
{
atomic_inc(&skb->users);
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}

#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
struct net_bridge;
struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
unsigned char *addr);
void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);

static __inline__ int handle_bridge(struct sk_buff **pskb,
struct packet_type **pt_prev, int *ret,
struct net_device *orig_dev)
{
struct net_bridge_port *port;

if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
(port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
return 0;

if (*pt_prev) {
*ret = deliver_skb(*pskb, *pt_prev, orig_dev);
*pt_prev = NULL;
}

return br_handle_frame_hook(port, pskb);
}
#else
#define handle_bridge(skb, pt_prev, ret, orig_dev) (0)
#endif

int REP_netif_receive_skb(struct sk_buff *skb)
{
//struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
//int ret = NET_RX_DROP;
//__be16 type;

/* if we've gotten here through NAPI, check netpoll */
if (skb->dev->poll && netpoll_rx(skb))
return NET_RX_DROP;

if (!skb->tstamp.off_sec)
net_timestamp(skb);

if (!skb->input_dev)
skb->input_dev = skb->dev;

orig_dev = skb_bond(skb);

if (!orig_dev)
return NET_RX_DROP;

//__get_cpu_var(netdev_rx_stat).total++;

skb->h.raw = skb->nh.raw = skb->data;
skb->mac_len = skb->nh.raw - skb->mac.raw;

CBPTR(skb) = orig_dev;
return bs_dispatch(skb);
}

__netif_recv_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
int ret = NET_RX_DROP;
__be16 type;

orig_dev = CBPTR(skb);
CBPTR(skb) = 0;

pt_prev = NULL;

rcu_read_lock();

#ifdef CONFIG_NET_CLS_ACT
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
goto ncls;
}
#endif

list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}

#ifdef CONFIG_NET_CLS_ACT
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL; /* noone else should process this after*/
} else {
skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
}

ret = ing_filter(skb);

if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
kfree_skb(skb);
goto out;
}

skb->tc_verd = 0;
ncls:
#endif

if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
goto out;

type = skb->protocol;
list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}

if (pt_prev) {
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
ret = NET_RX_DROP;
}

out:
rcu_read_unlock();
return ret;
}

#endif 20

#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,21)

#ifdef CONFIG_NET_CLS_ACT
static int ing_filter(struct sk_buff *skb)
{
struct Qdisc *q;
struct net_device *dev = skb->dev;
int result = TC_ACT_OK;

if (dev->qdisc_ingress) {
__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
if (MAX_RED_LOOP < ttl++) {
printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
skb->iif, skb->dev->ifindex);
return TC_ACT_SHOT;
}

skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);

skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);

spin_lock(&dev->queue_lock);
if ((q = dev->qdisc_ingress) != NULL)
result = q->enqueue(skb, q);
spin_unlock(&dev->queue_lock);

}

return result;
}
#endif

static inline void net_timestamp(struct sk_buff *skb)
{
if (atomic_read(&netstamp_needed))
__net_timestamp(skb);
else {
skb->tstamp.off_sec = 0;
skb->tstamp.off_usec = 0;
}
}
static inline struct net_device *skb_bond(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;

if (dev->master) {
if (skb_bond_should_drop(skb)) {
kfree_skb(skb);
return NULL;
}
skb->dev = dev->master;
}

return dev;
}
static __inline__ int deliver_skb(struct sk_buff *skb,
struct packet_type *pt_prev,
struct net_device *orig_dev)
{
atomic_inc(&skb->users);
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}

#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
struct net_bridge;
struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
unsigned char *addr);
void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);

static __inline__ int handle_bridge(struct sk_buff **pskb,
struct packet_type **pt_prev, int *ret,
struct net_device *orig_dev)
{
struct net_bridge_port *port;

if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
(port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
return 0;

if (*pt_prev) {
*ret = deliver_skb(*pskb, *pt_prev, orig_dev);
*pt_prev = NULL;
}

return br_handle_frame_hook(port, pskb);
}
#else
#define handle_bridge(skb, pt_prev, ret, orig_dev) (0)
#endif

int REP_netif_receive_skb(struct sk_buff *skb)
{
//struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
//int ret = NET_RX_DROP;
//__be16 type;

/* if we've gotten here through NAPI, check netpoll */
if (skb->dev->poll && netpoll_rx(skb))
return NET_RX_DROP;

if (!skb->tstamp.off_sec)
net_timestamp(skb);

if (!skb->iif)
skb->iif = skb->dev->ifindex;

orig_dev = skb_bond(skb);

if (!orig_dev)
return NET_RX_DROP;

//__get_cpu_var(netdev_rx_stat).total++;

skb->h.raw = skb->nh.raw = skb->data;
skb->mac_len = skb->nh.raw - skb->mac.raw;

CBPTR(skb) = orig_dev;
return bs_dispatch(skb);
}

__netif_recv_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
int ret = NET_RX_DROP;
__be16 type;

orig_dev = CBPTR(skb);
CBPTR(skb) = 0;

pt_prev = NULL;

rcu_read_lock();

#ifdef CONFIG_NET_CLS_ACT
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
goto ncls;
}
#endif

list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}

#ifdef CONFIG_NET_CLS_ACT
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL; /* noone else should process this after*/
} else {
skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
}

ret = ing_filter(skb);

if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
kfree_skb(skb);
goto out;
}

skb->tc_verd = 0;
ncls:
#endif

if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
goto out;

type = skb->protocol;
list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}

if (pt_prev) {
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
ret = NET_RX_DROP;
}

out:
rcu_read_unlock();
return ret;
}

#endif 21

#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,22)
static int ing_filter(struct sk_buff *skb)
{
struct Qdisc *q;
struct net_device *dev = skb->dev;
int result = TC_ACT_OK;

if (dev->qdisc_ingress) {
__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
if (MAX_RED_LOOP < ttl++) {
printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
skb->iif, skb->dev->ifindex);
return TC_ACT_SHOT;
}

skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);

skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);

spin_lock(&dev->ingress_lock);
if ((q = dev->qdisc_ingress) != NULL)
result = q->enqueue(skb, q);
spin_unlock(&dev->ingress_lock);

}

return result;
}

static inline void net_timestamp(struct sk_buff *skb)
{
if (atomic_read(&netstamp_needed))
__net_timestamp(skb);
else
skb->tstamp.tv64 = 0;
}
static inline struct net_device *skb_bond(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;

if (dev->master) {
if (skb_bond_should_drop(skb)) {
kfree_skb(skb);
return NULL;
}
skb->dev = dev->master;
}

return dev;
}

static inline int deliver_skb(struct sk_buff *skb,
struct packet_type *pt_prev,
struct net_device *orig_dev)
{
atomic_inc(&skb->users);
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}

#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
/* These hooks defined here for ATM */
struct net_bridge;
struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
unsigned char *addr);
void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;

/*
* If bridge module is loaded call bridging hook.
* returns NULL if packet was consumed.
*/
struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
struct sk_buff *skb) __read_mostly;
static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
struct packet_type **pt_prev, int *ret,
struct net_device *orig_dev)
{
struct net_bridge_port *port;

if (skb->pkt_type == PACKET_LOOPBACK ||
(port = rcu_dereference(skb->dev->br_port)) == NULL)
return skb;

if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
}

return br_handle_frame_hook(port, skb);
}
#else
#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
#endif

int REP_netif_receive_skb(struct sk_buff *skb)
{
//struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
//int ret = NET_RX_DROP;
//__be16 type;

/* if we've gotten here through NAPI, check netpoll */
if (skb->dev->poll && netpoll_rx(skb))
return NET_RX_DROP;

if (!skb->tstamp.tv64)
net_timestamp(skb);

if (!skb->iif)
skb->iif = skb->dev->ifindex;

orig_dev = skb_bond(skb);

if (!orig_dev)
return NET_RX_DROP;

//__get_cpu_var(netdev_rx_stat).total++;

skb_reset_network_header(skb);
skb_reset_transport_header(skb);
skb->mac_len = skb->network_header - skb->mac_header;


CBPTR(skb) = orig_dev;
return bs_dispatch(skb);
}

__netif_recv_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
int ret = NET_RX_DROP;
__be16 type;

orig_dev = CBPTR(skb);
CBPTR(skb) = 0;
pt_prev = NULL;

rcu_read_lock();

#ifdef CONFIG_NET_CLS_ACT
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
goto ncls;
}
#endif

list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}

#ifdef CONFIG_NET_CLS_ACT
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL; /* noone else should process this after*/
} else {
skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
}

ret = ing_filter(skb);

if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
kfree_skb(skb);
goto out;
}

skb->tc_verd = 0;
ncls:
#endif

skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
if (!skb)
goto out;

type = skb->protocol;
list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}

if (pt_prev) {
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
ret = NET_RX_DROP;
}

out:
rcu_read_unlock();
return ret;
}
#endif 22

#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,23)
#ifdef CONFIG_NET_CLS_ACT
static int ing_filter(struct sk_buff *skb)
{
struct Qdisc *q;
struct net_device *dev = skb->dev;
int result = TC_ACT_OK;

if (dev->qdisc_ingress) {
__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
if (MAX_RED_LOOP < ttl++) {
printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
skb->iif, skb->dev->ifindex);
return TC_ACT_SHOT;
}

skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);

skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);

spin_lock(&dev->ingress_lock);
if ((q = dev->qdisc_ingress) != NULL)
result = q->enqueue(skb, q);
spin_unlock(&dev->ingress_lock);

}

return result;
}
#endif

#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);

static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
struct packet_type **pt_prev,
int *ret,
struct net_device *orig_dev)
{
if (skb->dev->macvlan_port == NULL)
return skb;

if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
}
return macvlan_handle_frame_hook(skb);
}
#else
#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
#endif

static inline void net_timestamp(struct sk_buff *skb)
{
if (atomic_read(&netstamp_needed))
__net_timestamp(skb);
else
skb->tstamp.tv64 = 0;
}
static inline struct net_device *skb_bond(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;

if (dev->master) {
if (skb_bond_should_drop(skb)) {
kfree_skb(skb);
return NULL;
}
skb->dev = dev->master;
}

return dev;
}

static inline int deliver_skb(struct sk_buff *skb,
struct packet_type *pt_prev,
struct net_device *orig_dev)
{
atomic_inc(&skb->users);
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}

#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
/* These hooks defined here for ATM */
struct net_bridge;
struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
unsigned char *addr);
void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;

/*
* If bridge module is loaded call bridging hook.
* returns NULL if packet was consumed.
*/
struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
struct sk_buff *skb) __read_mostly;
static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
struct packet_type **pt_prev, int *ret,
struct net_device *orig_dev)
{
struct net_bridge_port *port;

if (skb->pkt_type == PACKET_LOOPBACK ||
(port = rcu_dereference(skb->dev->br_port)) == NULL)
return skb;

if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
}

return br_handle_frame_hook(port, skb);
}
#else
#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
#endif

int REP_netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
int ret = NET_RX_DROP;
__be16 type;

/* if we've gotten here through NAPI, check netpoll */
if (skb->dev->poll && netpoll_rx(skb))
return NET_RX_DROP;

if (!skb->tstamp.tv64)
net_timestamp(skb);

if (!skb->iif)
skb->iif = skb->dev->ifindex;

orig_dev = skb_bond(skb);

if (!orig_dev)
return NET_RX_DROP;

//__get_cpu_var(netdev_rx_stat).total++;

skb_reset_network_header(skb);
skb_reset_transport_header(skb);
skb->mac_len = skb->network_header - skb->mac_header;
#define PATCH_START net/core/dev.c
CBPTR(skb) = orig_dev;
return bs_dispatch(skb);
}

int __netif_recv_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
int ret = NET_RX_DROP;
__be16 type;

orig_dev = CBPTR(skb);
CBPTR(skb) = 0;

#undef PATCH_START
pt_prev = NULL;

rcu_read_lock();

#ifdef CONFIG_NET_CLS_ACT
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
goto ncls;
}
#endif

list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}

#ifdef CONFIG_NET_CLS_ACT
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL; /* noone else should process this after*/
} else {
skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
}

ret = ing_filter(skb);

if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
kfree_skb(skb);
goto out;
}

skb->tc_verd = 0;
ncls:
#endif

skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
if (!skb)
goto out;
skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
if (!skb)
goto out;

type = skb->protocol;
list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}

if (pt_prev) {
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
ret = NET_RX_DROP;
}

out:
rcu_read_unlock();
return ret;
}
#endif 23


//--------------------------------------------------------------------------------------
/*
* for standard patch, those lines should be moved into ../../net/sysctl_net.c
*/

/* COPY_OUT_START_TO net/sysctl_net.c */
#define PATCH_START net/sysctl_net.c
#ifdef CONFIG_BOTTOM_SOFTIRQ_SMP_SYSCTL
#if !defined(BS_CPU_STAT_DEFINED)
struct cpu_stat
{
unsigned long irqs; /* total irqs on me */
unsigned long dids; /* I did, */
unsigned long works; /* q works */
};
#endif
extern struct cpu_stat bs_cpu_status[NR_CPUS];
extern int bs_policy;
#undef PATCH_START
/* COPY_OUT_END_TO net/sysctl_net.c */

static ctl_table bs_ctl_table[] =
{
#define PATCH_START net/sysctl_net.c
/* COPY_OUT_START_TO net/sysctl_net.c */
{
.ctl_name = 99,
.procname = "bs_status",
.data = &bs_cpu_status,
.maxlen = sizeof(bs_cpu_status),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = 99,
.procname = "bs_policy",
.data = &bs_policy,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
#undef PATCH_START
/* COPY_OUT_END_TO net/net_sysctl.c */

{ 0, },
};

static ctl_table bs_sysctl_root[] =
{
{
.ctl_name = CTL_NET,
.procname = "net",
.mode = 0555,
.child = bs_ctl_table,
},
{ 0, },
};

struct ctl_table_header *bs_sysctl_hdr;
register_bs_sysctl(void)
{

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21)
bs_sysctl_hdr = register_sysctl_table(bs_sysctl_root);
#else
bs_sysctl_hdr = register_sysctl_table(bs_sysctl_root, 0);
#endif
return 0;
}


unregister_bs_sysctl(void)
{
unregister_sysctl_table(bs_sysctl_hdr);
}
#endif //CONFIG_BOTTOM_SOFTIRQ_SMP_SYSCTL

seeker_init()
{
int i;
if(nr_cpus == 0)
nr_cpus = num_online_cpus();
register_bs_sysctl();
}


seeker_exit()
{
unsigned long now;
unregister_bs_sysctl();
bs_policy = 0;
msleep(1000);
flush_scheduled_work();
now = jiffies;
msleep(1000);
printk("%u exited.\n", jiffies - now);
}
//-------------------------------------------------------------------------

/*--------------------------------------------------------------------------
*/

#define OE_KEEP_SIZE 5
static char saved[8];
char *dorepl(char *func_ptr, char *func_new, int on_off)
{
char jmp_entry[8];
int jmpoffset;
int i;
char *cp;

//printk("tapping: old %p new %p onoff %d replace %d\n", func_ptr, func_new, on_off, replace);

if(on_off == 0) {
if(!saved[0]) return 0;
lock_kernel();
memcpy(func_ptr, saved, OE_KEEP_SIZE);
unlock_kernel();
//code_dump(func_ptr, 9);
saved[0] = 0;
return saved;
}

if(1) {
if(!func_new) return 0;
memcpy(saved, func_ptr, OE_KEEP_SIZE);
printk("replace: old %p new %p onoff %d\n", func_ptr, func_new, on_off);

//do function replacing
jmp_entry[0] = '\xe9';
*(long*)&jmp_entry[1] = (long)(func_new - func_ptr - 5);

lock_kernel();
memcpy((char*)func_ptr, jmp_entry, 5);
unlock_kernel();
printk("function (%p) is replaced.\n", func_ptr);
return func_ptr;
}
}

static char system_map[128] = "/boot/System.map-";
static long sysmap_size;
static char *sysmap_buf;

unsigned long sysmap_name2addr(char *name)
{
char *cp, *dp;
unsigned long addr;
int len, n;

if(!sysmap_buf) return 0;
if(!name || !name[0]) return 0;
n = strlen(name);
for(cp = sysmap_buf; ;)
{
cp = strstr(cp, name);
if(!cp) {
printk("%s not found.\n", name);
return 0;
}

for(dp = cp; *dp && *dp != '\n' && *dp != ' ' && *dp != '\t'; dp++);

len = dp - cp;
if(len < n) goto cont;
if(cp > sysmap_buf && cp[-1] != ' ' && cp[-1] != '\t')
{
goto cont;
}
if(len > n)
{
goto cont;
}
break;
cont:
if(*dp == 0) break;
cp += (len+1);
}

cp -= 11;
if(cp > sysmap_buf && cp[-1] != '\n')
{
printk("_ERROR_ in name2addr cp = %p base %p\n", cp, sysmap_buf);
return 0;
}
sscanf(cp, "%x", &addr);
printk("VAR: %s %p\n", name, addr);
return addr;
}


static int kas_init()
{
struct file *fp;
int i, val;
long addr;
struct kstat st;
mm_segment_t old_fs;

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19)
#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,21)
//#include <linux/uts.h>
//#include <linux/utsname.h>
struct new_utsname {
char sysname[65];
char nodename[65];
char release[65];
char version[65];
char machine[65];
char domainname[65];
};
struct uts_namespace {
struct kref kref;
struct new_utsname name;
};
extern struct uts_namespace init_uts_ns;
#endif

strcat(system_map, init_uts_ns.name.release);
#else
strcat(system_map, system_utsname.release);
#endif
printk("uname -a %s\n", system_map);


old_fs = get_fs();
set_fs(get_ds()); /* systemp_map is __user variable */
i = vfs_stat(system_map, &st);
set_fs(old_fs);
if(i) return 1;

sysmap_size = st.size + 32;
fp = filp_open(system_map, O_RDONLY, FMODE_READ);
if(!fp) return 2;

sysmap_buf = vmalloc(sysmap_size);
if(!sysmap_buf)
{
filp_close(fp, 0);
return 3;
}
i = kernel_read(fp, 0, sysmap_buf, sysmap_size);
if(i <= 1024)
{
filp_close(fp, 0);
vfree(sysmap_buf);
sysmap_buf = 0;
return 4;
}
sysmap_size = i;
*(int*)&sysmap_buf = 0;
filp_close(fp, 0);

if(!(p_ptype_lock = sysmap_name2addr("ptype_lock"))) return 1;
if(!(p_ptype_base = sysmap_name2addr("ptype_base"))) return 1;
if(!(p_ptype_all = sysmap_name2addr("ptype_all"))) return 1;
if(!(Pkeventd_wq = sysmap_name2addr("keventd_wq"))) return 1;
if(!(p__queue_work = sysmap_name2addr("__queue_work"))) return 1;
if(!(p__netpoll_rx = sysmap_name2addr("__netpoll_rx"))) return 1;
if(!(p_netstamp_needed = sysmap_name2addr("netstamp_needed"))) return 1;

#ifdef CONFIG_NET_CLS_ACT
if(!(p_ing_filter = sysmap_name2addr("ing_filter"))) ; // return 1;
#endif
if(!(p_tapped = sysmap_name2addr(TAPFUNC))) return 1;
if(!(p_ip_rcv = sysmap_name2addr("ip_rcv"))) return 1;
vfree(sysmap_buf);

return 0;

}

/*--------------------------------------------------------------------------
*/
static int __init init()
{
struct packet_type *pt;
int r;
if((r = kas_init())) {
printk("can't resolve globals. err %d\n", r);
return -1;
}

//printk("REP_netif_receive_skb %p\n", REP_netif_receive_skb);

if(!dorepl(p_tapped, REP_netif_receive_skb, 1))
return -1;

seeker_init();
printk("bs_smp loaded.\n");
return 0;
}


static void __exit exit(void)
{
seeker_exit();
dorepl(p_tapped, REP_netif_receive_skb, 0);
printk("KERNEL VERSION = %d %p\n", KERNEL_VERSION(2,6,23), KERNEL_VERSION(2,6,23));
}


module_init(init)
module_exit(exit)
MODULE_LICENSE("GPL");
 
User avatar
Chupaka
Forum Guru
Forum Guru
Posts: 8712
Joined: Mon Jun 19, 2006 11:15 pm
Location: Minsk, Belarus
Contact:

Re: solve the load balancing problem of linux on irq in SMP

Wed Aug 26, 2009 12:04 pm

Jamal, now you will not able to escape explaining
me how you were going to use this.
:D :D :D