 /****************************************************************************
 *
 * Copyright (c) 2015 Broadcom Corporation
 *
 * Unless you and Broadcom execute a separate written software license
 * agreement governing use of this software, this software is licensed to
 * you under the terms of the GNU General Public License version 2 (the
 * "GPL"), available at [http://www.broadcom.com/licenses/GPLv2.php], with
 * the following added to such license:
 *
 * As a special exception, the copyright holders of this software give you
 * permission to link this software with independent modules, and to copy
 * and distribute the resulting executable under terms of your choice,
 * provided that you also meet, for each linked independent module, the
 * terms and conditions of the license of that module. An independent
 * module is a module which is not derived from this software. The special
 * exception does not apply to any modifications of the software.
 *
 * Notwithstanding the above, under no circumstances may you combine this
 * software in any way with any other Broadcom software provided under a
 * license other than the GPL, without Broadcom's express prior written
 * consent.
 *
 ****************************************************************************
 * Author: Jayesh Patel <jayeshp@broadcom.com>
 *****************************************************************************/

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/netfilter.h>
#include <linux/rculist_nulls.h>
#include <linux/ip.h>
#include <linux/version.h>
#include <net/route.h>
#include <net/dst.h>
#include <linux/inetdevice.h>
#include <linux/netfilter_bridge.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_nat.h>

/*
 * http://www.rfc-base.org/txt/rfc-4787.txt
 * 6.  Hairpinning Behavior
 *
 *    If two hosts (called X1 and X2) are behind the same NAT and
 *    exchanging traffic, the NAT may allocate an address on the outside of
 *    the NAT for X2, called X2':x2'.  If X1 sends traffic to X2':x2', it
 *    goes to the NAT, which must relay the traffic from X1 to X2.  This is
 *    referred to as hairpinning and is illustrated below.
 *
 *    +----+ from X1:x1 to X2':x2'   +-----+ X1':x1'
 *    | X1 |>>>>>>>>>>>>>>>>>>>>>>>>>>>>>--+---
 *    +----+                         |  v  |
 *                                   |  v  |
 *                                   |  v  |
 *                                   |  v  |
 *    +----+ from X1':x1' to X2:x2   |  v  | X2':x2'
 *    | X2 |<<<<<<<<<<<<<<<<<<<<<<<<<<<<<--+---
 *    +----+                         +-----+
 *
 *                            Hairpinning Behavior
 *
 *    Hairpinning allows two endpoints on the internal side of the NAT to
 *    communicate even if they only use each other's external IP addresses
 *    and ports.
 *
 *    More formally, a NAT that supports hairpinning forwards packets
 *    originating from an internal address, X1:x1, destined for an external
 *    address X2':x2' that has an active mapping to an internal address
 *    X2:x2, back to that internal address, X2:x2.  Note that typically X1'
 *    is the same as X2'.
 *
 *    Furthermore, the NAT may present the hairpinned packet with either an
 *    internal (X1:x1) or an external (X1':x1') source IP address and port.
 *    Therefore, the hairpinning NAT behavior can be either "External
 *    source IP address and port" or "Internal source IP address and port".
 *    "Internal source IP address and port" may cause problems by confusing
 *    implementations that expect an external IP address and port.
 *
 *    REQ-9:  A NAT MUST support "Hairpinning".
 *
 *       a) A NAT Hairpinning behavior MUST be "External source IP address
 *          and port".
 *
 *    Justification:  This requirement is to allow communications between
 *       two endpoints behind the same NAT when they are trying each
 *       other's external IP addresses.
 *
 *       a) Using the external source IP address is necessary for
 *          applications with a restrictive policy of not accepting packets
 *          from IP addresses that differ from what is expected.
 *
 *
 * http://www.rfc-base.org/txt/rfc-5382.txt
 * 7.2.  Hairpinning Behavior
 *
 *   NATs that forward packets originating from an internal address,
 *   destined for an external address that matches the active mapping for
 *   an internal address, back to that internal address are defined in
 *   [BEHAVE-UDP] as supporting "hairpinning".  If the NAT presents the
 *   hairpinned packet with an external source IP address and port (i.e.,
 *   the mapped source address and port of the originating internal
 *   endpoint), then it is defined to have "External source IP address and
 *   port" for hairpinning.  Hairpinning is necessary to allow two
 *   internal endpoints (known to each other only by their external mapped
 *   addresses) to communicate with each other.  "External source IP
 *   address and port" behavior for hairpinning avoids confusing
 *   implementations that expect the external source IP address and port.
 *
 *   REQ-8:  A NAT MUST support "hairpinning" for TCP.
 *      a) A NAT's hairpinning behavior MUST be of type "External source
 *         IP address and port".
 *
 *   Justification:  This requirement allows two applications behind the
 *      same NAT that are trying to communicate with each other using
 *      their external addresses.
 *      a) Using the external source address and port for the hairpinned
 *         packet is necessary for applications that do not expect to
 *         receive a packet from a different address than the external
 *         address they are trying to communicate with.
 */

#define NEW_CT_4_LOOPBACK 1

/*
Following is required to make this module work
echo 1 > /proc/sys/net/ipv4/conf/wanbridge/accept_local
*/

static
void nat_find_loopback_cts(struct net *net,
			   struct nf_conntrack_tuple *tuple,
			   struct nf_conn *ctignore,
			   struct nf_conn **ct1,
			   struct nf_conn **ct2)
{
	unsigned int bucket;
	struct hlist_nulls_node *n;
	struct nf_conntrack_tuple_hash *hash;
	struct nf_conn *ct;

	for (bucket = 0; bucket < nf_conntrack_htable_size; bucket++) {
		rcu_read_lock();
		n = rcu_dereference(
		   hlist_nulls_first_rcu(&nf_conntrack_hash[bucket]));

		while (!is_a_nulls(n)) {

			hash = (struct nf_conntrack_tuple_hash *)n;
			ct = nf_ct_tuplehash_to_ctrack(hash);

			if (ct == ctignore)
				goto next_ct;

			if (!(ct->status & IPS_DST_NAT_DONE))
				goto next_ct;

			if (__nf_ct_tuple_src_equal(tuple,
				&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) {
				*ct1 = ct;
				goto next_ct;
			}
			if (__nf_ct_tuple_dst_equal(tuple,
				&ct->tuplehash[IP_CT_DIR_REPLY].tuple)) {
				*ct2 = ct;
				goto next_ct;
			}
next_ct:
			n = rcu_dereference(hlist_nulls_next_rcu(n));
		}
		rcu_read_unlock();
	}
}

static void dump_tuple(char *prefix, const struct nf_conntrack_tuple *tuple)
{
	pr_debug("%s: src=%pI4 dst=%pI4 sport=%hu dport=%hu\n", prefix,
		 &tuple->src.u3.ip, &tuple->dst.u3.ip,
		 ntohs(tuple->src.u.tcp.port),
		 ntohs(tuple->dst.u.tcp.port));
}

#if NEW_CT_4_LOOPBACK
static
unsigned int nf_inet_local_in(void *priv,
			      struct sk_buff *skb,
			      const struct nf_hook_state *state)
{
	struct nf_conntrack_tuple tuple;
	const struct nf_nat_hook *nat_hook;
	enum ip_conntrack_info ctinfo;
	struct nf_conntrack_tuple target;
	struct nf_conn *ctsrc = NULL;
	struct nf_conn *ctdst = NULL;
	struct nf_conn *ct = NULL;
	u_int8_t protonum;
	struct iphdr *iph;
	struct net_device *new_in;
	struct net_device *in = state->in;

	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
			       PF_INET, dev_net(in),
			       &tuple)) {
		pr_err("Error nf_ct_get_tuple\n");
		return NF_ACCEPT;
	}

	ct = nf_ct_get(skb, &ctinfo);

	if (!ct)
		return NF_ACCEPT;

	nat_find_loopback_cts(dev_net(in), &tuple, ct, &ctsrc, &ctdst);

	iph = (void *)skb->data + skb_network_offset(skb);

	if (!ctsrc || !ctdst)
		return NF_ACCEPT;

	pr_debug("Before src=%pI4 dst=%pI4 pkt_type=%d dev=%s ct=%px %px %px\n",
		 &(iph->saddr), &(iph->daddr), skb->pkt_type,
		 skb->dev->name, ct, ctsrc, ctdst);

	dump_tuple("tuple     ", &tuple);
	dump_tuple("ctsrc-orig",
		   &ctsrc->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
	dump_tuple("ctsrc-repl",
		   &ctsrc->tuplehash[IP_CT_DIR_REPLY].tuple);
	nf_ct_invert_tuple(&target,
			   &ctsrc->tuplehash[IP_CT_DIR_REPLY].tuple);

	new_in = __ip_dev_find(dev_net(in), target.src.u3.in.s_addr, false);

	if (!new_in)
		return NF_ACCEPT;

	if (target.src.l3num != PF_INET)
		return NF_ACCEPT;

	if (target.dst.protonum != protonum)
		return NF_ACCEPT;

	nat_hook = rcu_dereference(nf_nat_hook);
	if (!nat_hook)
		return 0;

	/* Update Source IP and port */
	if (!nat_hook->manip_pkt(skb, ctsrc, NF_NAT_MANIP_SRC, IP_CT_DIR_ORIGINAL))
		return NF_ACCEPT;



	dump_tuple("ctdst-orig",
		   &ctdst->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
	dump_tuple("ctdst-repl",
		   &ctdst->tuplehash[IP_CT_DIR_REPLY].tuple);
	nf_ct_invert_tuple(&target,
			   &ctdst->tuplehash[IP_CT_DIR_ORIGINAL].tuple);

	if (target.src.l3num != PF_INET)
		return NF_ACCEPT;

	if (target.dst.protonum != protonum)
		return NF_ACCEPT;

	/* Update Destination IP and port */
	if (!nat_hook->manip_pkt(skb, ctdst, NF_NAT_MANIP_DST, IP_CT_DIR_REPLY))
		return NF_ACCEPT;

	ct->status &= ~(IPS_NAT_MASK);
	ct->status |= (IPS_NAT_DONE_MASK);
	skb->dev = new_in;
	skb->skb_iif = new_in->ifindex;
#ifdef CONFIG_NF_CONNTRACK_OFFLOAD
	skb->dev_in = new_in;
#endif

	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
		ct->tuplehash[IP_CT_DIR_REPLY].tuple.src
			= ctdst->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src;
		ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst
			= ctsrc->tuplehash[IP_CT_DIR_REPLY].tuple.dst;
	}

	pr_debug("After src=%pI4 dst=%pI4 pkt_type=%d dev=%s ct=%px\n",
		 &(iph->saddr), &(iph->daddr), skb->pkt_type,
		 skb->dev->name, ct);

	if (!ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, skb->dev)) {
		skb_dst(skb)->output(dev_net(in), skb->sk, skb);
		return NF_STOLEN;
	}
	return NF_ACCEPT;
}
#else
/*

Modify nf_conntrack_nat_help in nf_nat.h as follow:

#if defined(CONFIG_NF_NAT_LOOPBACK) || defined(CONFIG_NF_NAT_LOOPBACK_MODULE)
	struct nf_conntrack_tuple loopback_tuple;
#endif

*/
static
unsigned int nf_inet_pre_routing(const struct nf_hook_ops *ops,
				 struct sk_buff *skb,
				 const struct net_device *in,
				 const struct net_device *out,
				 int (*okfn)(struct sk_buff *))
{
	struct nf_conntrack_l3proto *l3proto;
	struct nf_conntrack_l4proto *l4proto;
	const struct nf_nat_l3proto *l3nat;
	const struct nf_nat_l4proto *l4nat;
	struct nf_conntrack_tuple tuple;
	struct nf_conntrack_tuple_hash *h;
	struct nf_conntrack_tuple target;
	unsigned int dataoff;
	struct nf_conn *ctsrc = NULL;
	struct nf_conn *ctdst = NULL;
	struct nf_conn *ct = NULL;
	u_int8_t protonum;
	struct iphdr *iph;
	struct net_device *new_in;
	struct nf_conn_nat *nat;
	int ret;

	l3proto = __nf_ct_l3proto_find(PF_INET);
	ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
				   &dataoff, &protonum);
	if (ret <= 0)
		return NF_ACCEPT;

	if ((protonum != IPPROTO_UDP) && (protonum != IPPROTO_TCP))
		return NF_ACCEPT;

	l4proto = __nf_ct_l4proto_find(PF_INET, protonum);

	if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
			     dataoff, PF_INET, protonum, &tuple, l3proto,
			     l4proto)) {
		pr_err("Error nf_ct_get_tuple\n");
		return NF_ACCEPT;
	}

	nat_find_loopback_cts(dev_net(in), &tuple, ct, &ctsrc, &ctdst);

	iph = (void *)skb->data + skb_network_offset(skb);

	if (!ctsrc || !ctdst)
		return NF_ACCEPT;

	pr_debug("Before src=%pI4 dst=%pI4 pkt_type=%d dev=%s ct=%px %px %px\n",
		 &(iph->saddr), &(iph->daddr), skb->pkt_type,
		 skb->dev->name, ct, ctsrc, ctdst);

	dump_tuple("tuple     ", &tuple);
	dump_tuple("ctsrc-orig",
		   &ctsrc->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
	dump_tuple("ctsrc-repl",
		   &ctsrc->tuplehash[IP_CT_DIR_REPLY].tuple);
	dump_tuple("ctdst-orig",
		   &ctdst->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
	dump_tuple("ctdst-repl",
		   &ctdst->tuplehash[IP_CT_DIR_REPLY].tuple);
	memcpy(&target,
	       &ctdst->tuplehash[IP_CT_DIR_REPLY].tuple,
	       sizeof(tuple));
	l3nat = __nf_nat_l3proto_find(target.src.l3num);
	l4nat = __nf_nat_l4proto_find(target.src.l3num,
				      target.dst.protonum);
	/* Update Source IP and port */
	if (!l3nat->manip_pkt(skb, 0, l4nat, &target, NF_NAT_MANIP_SRC))
		return NF_ACCEPT;

	new_in = __ip_dev_find(dev_net(in), iph->daddr, false);
	skb->dev = new_in;
	skb->skb_iif = new_in->ifindex;
	memcpy(eth_hdr(skb)->h_dest, new_in->dev_addr, ETH_ALEN);
	memcpy(eth_hdr(skb)->h_source, in->dev_addr, ETH_ALEN);

	if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
			     dataoff, PF_INET, protonum, &tuple, l3proto,
			     l4proto)) {
		pr_err("Error nf_ct_get_tuple\n");
		return NF_ACCEPT;
	}
	h = nf_conntrack_find_get(dev_net(in), NF_CT_DEFAULT_ZONE,
				  &tuple);

	ct = nf_ct_tuplehash_to_ctrack(h);
	if (!ct) {
		pr_err("Error nf_ct_get\n");
		return NF_ACCEPT;
	}

	pr_debug("After src=%pI4 dst=%pI4 pkt_type=%d dev=%s ct=%px\n",
		 &(iph->saddr), &(iph->daddr), skb->pkt_type,
		 skb->dev->name, ct);

	nat = nfct_nat(ct);
	if (!nat) {
		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
		if (nat == NULL) {
			pr_debug("failed to add NAT extension\n");
			return NF_ACCEPT;
		}
	}
	memcpy(&nat->help.loopback_tuple,
	       &ctsrc->tuplehash[IP_CT_DIR_REPLY].tuple,
	       sizeof(tuple));

	return NF_ACCEPT;
}

static
unsigned int nf_inet_post_routing(const struct nf_hook_ops *ops,
				  struct sk_buff *skb,
				  const struct net_device *in,
				  const struct net_device *out,
				  int (*okfn)(struct sk_buff *))
{
	const struct nf_nat_l3proto *l3nat;
	const struct nf_nat_l4proto *l4nat;
	enum ip_conntrack_info ctinfo;
	struct nf_conntrack_tuple target;
	struct nf_conn *ct = NULL;
	struct iphdr *iph;
	struct nf_conn_nat *nat;

	ct = nf_ct_get(skb, &ctinfo);
	if (!ct) {
		pr_err("Error nf_ct_get\n");
		return NF_ACCEPT;
	}
	nat = nfct_nat(ct);
	if (!nat) {
		pr_err("Error nf_ct_get\n");
		return NF_ACCEPT;
	}

	if (!nat->help.loopback_tuple.dst.protonum)
		return NF_ACCEPT;

	iph = (void *)skb->data + skb_network_offset(skb);

	pr_debug("Post: Before src=%pI4 dst=%pI4 pkt_type=%d dev=%s ct=%px\n",
		 &(iph->saddr), &(iph->daddr), skb->pkt_type,
		 skb->dev->name, ct);

	nf_ct_invert_tuplepr(&target,
			     &nat->help.loopback_tuple);
	l3nat = __nf_nat_l3proto_find(target.src.l3num);
	l4nat = __nf_nat_l4proto_find(target.src.l3num,
				      target.dst.protonum);
	/* Update Source IP and port */
	if (!l3nat->manip_pkt(skb, 0, l4nat, &target, NF_NAT_MANIP_SRC))
		return NF_ACCEPT;

	pr_debug("Post: After src=%pI4 dst=%pI4 pkt_type=%d dev=%s ct=%px\n",
		 &(iph->saddr), &(iph->daddr), skb->pkt_type,
		 skb->dev->name, ct);

	return NF_ACCEPT;
}
#endif
static struct nf_hook_ops nf_ops[] __read_mostly = {
#if NEW_CT_4_LOOPBACK
	{
		.pf       = NFPROTO_IPV4,
		.priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
		.hooknum  = NF_INET_LOCAL_IN,
		.hook     = nf_inet_local_in,
	},
#else
	{
		.pf       = NFPROTO_IPV4,
		.priority = INT_MIN,
		.hooknum  = NF_INET_PRE_ROUTING,
		.hook     = nf_inet_pre_routing,
		.owner    = THIS_MODULE,
	},
	{
		.pf       = NFPROTO_IPV4,
		.priority = INT_MAX,
		.hooknum  = NF_INET_POST_ROUTING,
		.hook     = nf_inet_post_routing,
		.owner    = THIS_MODULE,
	},
#endif
};

static int __init init(void)
{
	int ret;
	ret = nf_register_net_hooks(&init_net, nf_ops, ARRAY_SIZE(nf_ops));
	pr_info("NAT Loopback Module Loaded\n");
	return ret;
}

static void __exit fini(void)
{
	nf_unregister_net_hooks(&init_net, nf_ops, ARRAY_SIZE(nf_ops));
	pr_info("NAT Loopback Exit\n");
}

module_init(init);
module_exit(fini);
MODULE_DESCRIPTION("NAT Loopback");
MODULE_AUTHOR("Jayesh Patel");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ip_nat_loopback");
