 /****************************************************************************
 *
 * Copyright (c) 2016 Broadcom Ltd.
 *
 * Unless you and Broadcom execute a separate written software license
 * agreement governing use of this software, this software is licensed to
 * you under the terms of the GNU General Public License version 2 (the
 * "GPL"), available at [http://www.broadcom.com/licenses/GPLv2.php], with
 * the following added to such license:
 *
 * As a special exception, the copyright holders of this software give you
 * permission to link this software with independent modules, and to copy
 * and distribute the resulting executable under terms of your choice,
 * provided that you also meet, for each linked independent module, the
 * terms and conditions of the license of that module. An independent
 * module is a module which is not derived from this software. The special
 * exception does not apply to any modifications of the software.
 *
 * Notwithstanding the above, under no circumstances may you combine this
 * software in any way with any other Broadcom software provided under a
 * license other than the GPL, without Broadcom's express prior written
 * consent.
 *
 ****************************************************************************
 * Authors:Ignatius Cheng <ignatius.cheng@broadcom.com>
 *
 * Feburary, 2016
 *
 ****************************************************************************/
#include <linux/module.h>
#include <linux/netfilter/x_tables.h>
#include <linux/skbuff.h>
#include <linux/printk.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/netfilter/xt_l2ogre_tg.h>
#include <linux/if_vlan.h>
#include <net/gre.h>

#define DSCP_POLICY_AUTOMARKING	(-2)
#define DSCP_POLICY_INHERIT	(-1)
#define DSCP_TOS_SHIFT		2
#define DSCP_MASK		0xfc	/* 11111100 */

/* #define XT_L2OGRE_TG_DEBUG */
#ifdef XT_L2OGRE_TG_DEBUG
static void
xt_l2ogre_dump(const struct sk_buff *skb)
{
	pr_err("xt_l2ogre_dump: head=%x, len=%d, data=%x, data_len=%d, mac_len=%d hdr_len=%d\n"
	       "      (outter)  proto=%x, tp_hdr=%d, net_hdr=%d, mac_hdr=%d,\n"
	       "       (inner)  proto=%x, tp_hdr=%d, net_hdr=%d, mac_hdr=%d\n",
	       (unsigned int)skb->head, skb->len, (unsigned int)skb->data,
	       skb->data_len, skb->mac_len, skb->hdr_len,
	       (unsigned int)skb->protocol, skb->transport_header,
	       skb->network_header, skb->mac_header,
	       (unsigned int)skb->inner_protocol, skb->inner_transport_header,
	       skb->inner_network_header, skb->inner_mac_header);

	pr_err("xt_l2ogre_dump: vlan_proto=0x%04x, vlan_tci=0x%04x, encapsulation=%d\n",
	       (unsigned int)skb->vlan_proto, (unsigned int)skb->vlan_tci,
	       skb->encapsulation);

	print_hex_dump(KERN_ERR, "xt_l2ogre_dump: ", DUMP_PREFIX_ADDRESS, 16, 1,
		       skb->data, skb->len, false);
}
#endif

static int
xt_l2ogre_remove_vlan_tag(int is_ipv4, struct sk_buff *skb, __u16 vlan_id)
{
	int ret = false;

	/* IP parameters */
	struct iphdr *iph = NULL;
	struct ipv6hdr *ipv6h = NULL;
	unsigned int ip_hlen;	/* ipv6 or ipv4 header length */
	__be16 ip_len;		/* ipv6 or ipv4 total length */

	/* GRE parameters */
	struct gre_base_hdr *greh;
	unsigned int greh_optlen = 0;

	/* VLAN parameters */
	struct vlan_ethhdr *vlan_eh;
	__be32 vlan_tag;

	unsigned int move_len;	/* length to be moved when removing VLAN tag */

	if (is_ipv4) {
		iph = ip_hdr(skb);
		ip_hlen = (iph->ihl * 4);
		/* basic check that this is a gre packet */
		if (iph->protocol != IPPROTO_GRE) {
			pr_err("xt_l2ogre_remove_vlan_tag: NOT a GRE packet\n");
			return false;
		}
	} else {
		__u8 nexthdr;
		__be16 frag_off;

		ipv6h = ipv6_hdr(skb);
		nexthdr = ipv6h->nexthdr;
		ip_hlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
					   &nexthdr, &frag_off);
		if (nexthdr != IPPROTO_GRE) {
			pr_err("xt_l2ogre_remove_vlan_tag: NOT a GRE packet\n");
			return false;
		}
	}

	greh = (struct gre_base_hdr *)(skb->data + ip_hlen);
	/* make sure this is gretap 0x6558 for the ethernet type */
	if (greh->protocol != __cpu_to_be16(ETH_P_TEB)) {
		pr_err("xt_l2ogre_remove_vlan_tag: NOT a GRE over Ethernet packet\n");
		return false;
	}

	if (greh->flags & GRE_CSUM)
		greh_optlen += 4;
	if (greh->flags & GRE_KEY)
		greh_optlen += 4;
	if (greh->flags & GRE_SEQ)
		greh_optlen += 4;

	vlan_eh = (struct vlan_ethhdr *)(skb->data + ip_hlen +
		sizeof(struct gre_base_hdr) + greh_optlen);
	if (vlan_eh->h_vlan_proto != __cpu_to_be16(ETH_P_8021Q)) {
		pr_warn("xt_l2ogre_remove_vlan_tag: NOT a 8021Q packet\n");
		return false;
	}
	if ((vlan_eh->h_vlan_TCI & __cpu_to_be16(VLAN_VID_MASK)) !=
		__cpu_to_be16(vlan_id)) {
		pr_warn("xt_l2ogre_remove_vlan_tag: NOT same VLAN ID %d\n",
			vlan_id);
		return false;
	}

	vlan_tag = *(__be32 *)(&vlan_eh->h_vlan_proto);
	/* Outer IP header + GRE header + 2 ethernet addresses from Inner
	 * Ethernet header */
	move_len = ip_hlen + sizeof(struct gre_base_hdr) + greh_optlen +
		(2 * ETH_ALEN);

	if (!pskb_may_pull(skb, VLAN_HLEN)) {
		pr_warn("xt_l2ogre_remove_vlan_tag: PULL SKB - pskb_may_pull failed\n");
		return ret;
	}

	/* Update the IP header length and IP header checksum */
	if (is_ipv4) {
		/* Update IP total len */
		ip_len = iph->tot_len;
		iph->tot_len = __cpu_to_be16(__be16_to_cpu(ip_len) - VLAN_HLEN);

		/* Update IPv4 Checksum */
		csum_replace2(&iph->check, ip_len, iph->tot_len);
	} else {
		/* Update IPv6 payload len */
		ip_len = ipv6h->payload_len;
		ipv6h->payload_len = __cpu_to_be16(__be16_to_cpu(ip_len) -
			VLAN_HLEN);
	}

	/* Update GRE Checksum */
	if (greh->flags & GRE_CSUM) {
		/* remove the VLAN tag from the GRE checksum */
		__be16 *p_greh_csum = (__be16 *)(greh+1);

		__wsum cs = ~csum_unfold(*p_greh_csum);
		cs -= vlan_tag;
		if (cs > vlan_tag)
			cs--;  /* take away carry from checksum */
		*p_greh_csum = csum_fold(cs);
	}

	/* Move the IP header and GRE header and part of the inner Ethernet
	 * header to the beginning of the new header. */
	memmove(skb->data + VLAN_HLEN, skb->data, move_len);
	skb_pull(skb, VLAN_HLEN);

	/* adjust the moved header */
	skb->network_header += VLAN_HLEN;
	skb->mac_header += VLAN_HLEN;
	skb->transport_header += VLAN_HLEN;

	ret = true;
	return ret;
}

static int
xt_l2ogre_insert_vlan_tag(int is_ipv4, struct sk_buff *skb, __u32 vlan_tag)
{
	int ret = false;

	/* IP parameters */
	unsigned int ip_hlen;	/* ipv6 or ipv4 header length */
	__be16 ip_len;		/* ipv6 or ipv4 total length */

	/* GRE parameters */
	struct gre_base_hdr *greh;

	/* VLAN parameters */
	struct vlan_ethhdr *vlan_eh;

	unsigned int move_len;	/* length to be moved when removing VLAN tag */

	/* Outer IP header + GRE header + 2 ethernet addresses from Inner
	 * Ethernet header */
	move_len = skb_inner_mac_header(skb) - skb->data + (2 * ETH_ALEN);

	/* make sure there is enough room in the sk_buff */
	if (skb_headroom(skb) < VLAN_HLEN) {
		/* Expand SKB */
		if (pskb_expand_head(skb, VLAN_HLEN, 0, GFP_ATOMIC)) {
			pr_err("xt_l2ogre_insert_vlan_tag: Expand SKB - pskb_expand_head failed\n");
			return ret;
		}
	}

	/* make room for the VLAN tag */
	skb_push(skb, VLAN_HLEN);
	/* Move the IP header and GRE header and part of the inner Ethernet
	 * header to the beginning of the new header. */
	memmove(skb->data, skb->data + VLAN_HLEN, move_len);

	/* adjust the moved header */
	skb->network_header -= VLAN_HLEN;
	skb->mac_header -= VLAN_HLEN;
	skb->inner_mac_header -= VLAN_HLEN;

	/* populate the VLAN tag */
	vlan_eh = vlan_eth_hdr(skb);
	vlan_eh->h_vlan_proto = __cpu_to_be16(vlan_tag >> 16);
	vlan_eh->h_vlan_TCI = __cpu_to_be16(vlan_tag & 0xffff);

	/* Update the IP header length */
	if (is_ipv4) {
		struct iphdr *iph = ip_hdr(skb);
		ip_hlen = (iph->ihl * 4);
		ip_len = iph->tot_len;

		/* Update IP total len */
		iph->tot_len = __cpu_to_be16(__be16_to_cpu(ip_len) + VLAN_HLEN);

		/* Update IPv4 Checksum */
		csum_replace2(&iph->check, ip_len, iph->tot_len);
	} else {
		struct ipv6hdr *ipv6h = ipv6_hdr(skb);
		__u8 nexthdr;
		__be16 frag_off;
		nexthdr = ipv6h->nexthdr;
		ip_hlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
			&nexthdr, &frag_off);

		/* Update IPv6 payload len */
		ip_len = ipv6h->payload_len;
		ipv6h->payload_len = __cpu_to_be16(__be16_to_cpu(ip_len) +
			VLAN_HLEN);
	}

	/* Update GRE Checksum */
	greh = (struct gre_base_hdr *)(skb->data + ip_hlen);
	if (greh->flags & GRE_CSUM) {
		/* add the VLAN tag to the GRE checksum */
		__be16 *p_greh_csum = (__be16 *)(greh+1);
		__wsum cs = ~csum_unfold(*p_greh_csum);
		cs += vlan_tag;
		if (cs < vlan_tag)
			cs++;  /* add carry to checksum */
		*p_greh_csum = csum_fold(cs);
	}
	ret = true;
	return ret;
}

static int
xt_l2ogre_update_dscp(int is_ipv4, struct sk_buff *skb, __s8 dscp)
{
	int ret = false;
	/* Default Layer 2/3 QoS Mapping for DSCP (when automatic
	 * marking policy) from TR-181 Annex 2.
	 * Layer2ToLayer3QoSMapping for LAN->WAN packet
	 * index for this table is VLAN tag Priority code point. */
	static const __u8 layer2_to_layer3_qos_mapping[]
		= { 0, 0, 0, 0x8, 0x10, 0x18, 0x28, 0x38 };
	struct vlan_ethhdr *vlan_eh;
	__u16 eth_type;

	if (dscp == DSCP_POLICY_AUTOMARKING) {
		vlan_eh = vlan_eth_hdr(skb);
		if (vlan_eh->h_vlan_proto == __cpu_to_be16(ETH_P_8021Q)) {
			__u8 eth_priority = (__be16_to_cpu(vlan_eh->h_vlan_TCI)
				& VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
			dscp = layer2_to_layer3_qos_mapping[eth_priority];
		} else {
			dscp = 0;	/* there is no VLAN tag. */
		}
	} else if (dscp == DSCP_POLICY_INHERIT) {
		vlan_eh = vlan_eth_hdr(skb);
		if (vlan_eh->h_vlan_proto == __cpu_to_be16(ETH_P_8021Q))
			eth_type = __be16_to_cpu(
				vlan_eh->h_vlan_encapsulated_proto);
		else
			eth_type = __be16_to_cpu(vlan_eh->h_vlan_proto);

		if (eth_type == ETH_P_IP) {
			struct iphdr *iph = (struct iphdr *)
				skb_inner_network_header(skb);
			dscp = ipv4_get_dsfield(iph) >> DSCP_TOS_SHIFT;
		} else if (eth_type == ETH_P_IPV6) {
			struct ipv6hdr *ipv6h = (struct ipv6hdr *)
				skb_inner_network_header(skb);
			dscp = ipv6_get_dsfield(ipv6h) >> DSCP_TOS_SHIFT;
		} else {
			dscp = 0;
		}
	}

	/* update the dscp field */
	if (is_ipv4) {
		struct iphdr *iph = ip_hdr(skb);
		if ((ipv4_get_dsfield(iph) >> DSCP_TOS_SHIFT) != dscp)
			ipv4_change_dsfield(iph, (__u8)(~DSCP_MASK),
					    (__u8)(dscp << DSCP_TOS_SHIFT));
	} else {
		struct ipv6hdr *ipv6h = ipv6_hdr(skb);
		if ((ipv6_get_dsfield(ipv6h) >> DSCP_TOS_SHIFT) != dscp)
			ipv6_change_dsfield(ipv6h, (__u8)(~DSCP_MASK),
					    (__u8)(dscp << DSCP_TOS_SHIFT));
	}
	ret = true;
	return ret;
}

static unsigned int
xt_l2ogre_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
	const struct xt_l2ogre_tginfo *info = par->targinfo;
	int is_ipv4 = (par->state->pf == PF_INET);
	int ret = true;

	/* Make skb writable first */
	if (!skb_ensure_writable(skb, skb->len))
		return NF_DROP;

	if (ret && info->flags & XT_L2OGRE_TG_VLAN_UNTAG)
		ret = xt_l2ogre_remove_vlan_tag(is_ipv4, skb, info->vlan_untag);
	if (ret && info->flags & XT_L2OGRE_TG_VLAN_TAG)
		ret = xt_l2ogre_insert_vlan_tag(is_ipv4, skb, info->vlan_tag);
	if (ret && info->flags & XT_L2OGRE_TG_DSCP_POLICY)
		ret = xt_l2ogre_update_dscp(is_ipv4, skb, info->dscp);
	return ret ? NF_ACCEPT : NF_DROP;
}

static int xt_l2ogre_tg_check(const struct xt_tgchk_param *par)
{
	/* const struct xt_l2ogre_tginfo *info = par->targinfo; */
	return 0;
}

void xt_l2ogre_tg_destroy(const struct xt_tgdtor_param *par)
{
	/* const struct xt_l2ogre_tginfo *info = par->targinfo; */
}

static struct xt_target xt_l2ogre_tg_reg[] __read_mostly = {
	{
		.family		= NFPROTO_IPV4,
		.proto		= IPPROTO_GRE,
		.name		= "L2OGRE",
		.revision	= 0,
		.hooks		= (1 << NF_INET_LOCAL_IN) |
				  (1 << NF_INET_LOCAL_OUT),
		.checkentry	= xt_l2ogre_tg_check,
		.target		= xt_l2ogre_tg,
		.destroy	= xt_l2ogre_tg_destroy,
		.targetsize	= sizeof(struct xt_l2ogre_tginfo),
		.me		= THIS_MODULE,
	},
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
	{
		.family	  = NFPROTO_IPV6,
		.proto		= IPPROTO_GRE,
		.name		= "L2OGRE",
		.revision	= 0,
		.hooks		= (1 << NF_INET_LOCAL_IN) |
				  (1 << NF_INET_LOCAL_OUT),
		.checkentry	= xt_l2ogre_tg_check,
		.target		= xt_l2ogre_tg,
		.destroy	= xt_l2ogre_tg_destroy,
		.targetsize	= sizeof(struct xt_l2ogre_tginfo),
		.me		= THIS_MODULE,
	},
#endif
};

int __init xt_l2ogre_tg_init(void)
{
	return xt_register_targets(xt_l2ogre_tg_reg,
		ARRAY_SIZE(xt_l2ogre_tg_reg));
}

void __exit xt_l2ogre_tg_finish(void)
{
	xt_unregister_targets(xt_l2ogre_tg_reg, ARRAY_SIZE(xt_l2ogre_tg_reg));
}
