 /****************************************************************************
 *
 * Copyright (c) 2015 Broadcom Corporation
 *
 * Unless you and Broadcom execute a separate written software license
 * agreement governing use of this software, this software is licensed to
 * you under the terms of the GNU General Public License version 2 (the
 * "GPL"), available at [http://www.broadcom.com/licenses/GPLv2.php], with
 * the following added to such license:
 *
 * As a special exception, the copyright holders of this software give you
 * permission to link this software with independent modules, and to copy
 * and distribute the resulting executable under terms of your choice,
 * provided that you also meet, for each linked independent module, the
 * terms and conditions of the license of that module. An independent
 * module is a module which is not derived from this software. The special
 * exception does not apply to any modifications of the software.
 *
 * Notwithstanding the above, under no circumstances may you combine this
 * software in any way with any other Broadcom software provided under a
 * license other than the GPL, without Broadcom's express prior written
 * consent.
 *
 ****************************************************************************
 * Author: Tim Ross <tross@broadcom.com>
 *****************************************************************************/
#include <linux/types.h>
#include <linux/platform_device.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/irqreturn.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/io.h>
#include <linux/fs.h>
#include <linux/proc_fs.h>
#include <linux/sched.h>
#include <linux/kthread.h>

#include "dqm.h"
#include "fpm.h"

/*
 * To use this capture you must map certain registers being ioremapped in
 * dqmt_init below to the DT so that hypervisor will allow them to be
 * mapped/accessed.
 */
#define CAPTURE_DQM_DEBUG_DATA	0

#define DEFAULT_TX_ITERATIONS	(10*1000*1000)
#define ASSUMED_PACKET_SIZE	(1500)
#define MAX_QS			(32)
#define WORD0_DATA		(0x071267)
#define WORD1_DATA		(0xcafebabe)

unsigned long iter;
bool show_stats;

#if CAPTURE_DQM_DEBUG_DATA
struct _stats {
	u32 push_count;
	u32 pop_count;
	u32 push_head;
	u32 push_tail;
	u32 pop_head;
	u32 pop_tail;
	u32 pre_push_push_tok;
	u32 pre_push_push_tok_next;
	u32 post_push_push_tok;
	u32 post_push_push_tok_next;
	u32 push_pop_tok;
	u32 push_pop_tok_next;
	u32 pop_push_tok;
	u32 pop_push_tok_next;
	u32 pop_pop_tok;
	u32 pop_pop_tok_next;
	struct fpm_pool_stats push_fpm;
	struct fpm_pool_stats pop_fpm;
} *stats;
u32 *q28_size;
#define HEAD_PTR_MASK	0xfffc0000
#define HEAD_PTR_SHIFT	18
#define TAIL_PTR_MASK	0x0003fff0
#define TAIL_PTR_SHIFT	4
u32 *q28_push_tok;
u32 *q28_push_tok_next;
u32 *q28_pop_tok;
u32 *q28_pop_tok_next;
#endif

struct q_info {
	void *q_h;
	int qnum;
	int msgsz;
	u32 start_time;
	u32 report_time;
	u32 msg_cnt;
	u32 drop_cnt;
	struct task_struct *tx_thread;
};
struct q_info qs[MAX_QS];

bool kill_rx_thread;
struct task_struct *rx_thread;
struct wait_queue_head rx_wait_q;
struct list_head rx_qs;
spinlock_t rx_qs_lock;
struct q_item {
	struct list_head q_list;
	struct q_info *q;
};

struct q_info *alloc_q(void) {
	int i;

	for (i = 0; i < MAX_QS; i++) {
		if (qs[i].q_h == NULL)
			break;
	}
	if (i < MAX_QS) {
		return &qs[i];
	} else
		return NULL;
}

void free_q(struct q_info *q)
{
	q->q_h = NULL;
}

struct q_info *find_q_by_num(int qnum) {
	int i;

	for (i = 0; i < MAX_QS; i++) {
		if (qs[i].qnum == qnum && qs[i].q_h)
			break;
	}
	if (i < MAX_QS)
		return &qs[i];
	else
		return NULL;
}

irqreturn_t dqmt_rx_isr(void *q_h, void *ctx, u32 flags)
{
	struct q_info *q = ctx;
	struct q_item *item;
	unsigned long f;

	pr_debug("-->\n");

	dqm_disable_rx_cb(q_h);

	item = kmalloc(sizeof(struct q_item), GFP_ATOMIC);
	if (!item) {
		pr_err("Couldn't allocate q_item.\n");
		goto done;
	}
	item->q = q;
	spin_lock_irqsave(&rx_qs_lock, f);
	list_add_tail(&item->q_list, &rx_qs);
	spin_unlock_irqrestore(&rx_qs_lock, f);
	wake_up(&rx_wait_q);

done:
	pr_debug("<--\n");
	return IRQ_HANDLED;
}

int dqmt_rx_thread(void *ctx)
{
	u32 msgdata[DQM_MAX_MSGSZ];
	int rx_time;
	struct q_info *q;
	DECLARE_WAITQUEUE(wait, current);
	struct q_item *item;
	unsigned long flags;
#if CAPTURE_DQM_DEBUG_DATA
	u32 head_tail;
	struct fpm_pool_stats pool_stats;
	u32 pop_count;
#endif

	pr_debug("-->\n");

	set_current_state(TASK_INTERRUPTIBLE);

	while (!kill_rx_thread) {
		add_wait_queue(&rx_wait_q, &wait);

		pr_debug("sleeping\n");
		if (list_empty(&rx_qs))
			schedule();
		else
			set_current_state(TASK_RUNNING);

		pr_debug("woke up\n");
		remove_wait_queue(&rx_wait_q, &wait);

		spin_lock_irqsave(&rx_qs_lock, flags);
		while (!list_empty(&rx_qs)) {
			pr_debug("pulling item from rx_q\n");
			item = list_entry(rx_qs.next, struct q_item, q_list);
			q = item->q;
			list_del(rx_qs.next);
			kfree(item);
			spin_unlock_irqrestore(&rx_qs_lock, flags);

			pr_debug("processing DQM %d\n", q->qnum);
			if (!q->start_time)
				q->start_time = jiffies;

			while (dqm_rx(q->q_h, 1, q->msgsz, msgdata)) {
				if (unlikely(msgdata[0] != WORD0_DATA)) {
					pr_err("Q #%d data mismatch! ", q->qnum);
					pr_err("word0: 0x%08x received, should be 0x%08x\n",
					       msgdata[0], WORD0_DATA);
					goto err_q_data;
				}
				if (unlikely(msgdata[1] != WORD1_DATA)) {
					pr_err("Q #%d data mismatch! ", q->qnum);
					pr_err("word0: 0x%08x received, should be 0x%08x\n",
					       msgdata[1], WORD1_DATA);
					goto err_q_data;
				}
				q->msg_cnt++;
#if CAPTURE_DQM_DEBUG_DATA
				pop_count = readl(&stats->pop_count);
				writel(++pop_count, &stats->pop_count);
				head_tail = readl(q28_size);
				writel((head_tail & HEAD_PTR_MASK) >> HEAD_PTR_SHIFT, &stats->pop_head);
				writel((head_tail & TAIL_PTR_MASK) >> TAIL_PTR_SHIFT, &stats->pop_tail);
				writel(readl(q28_push_tok), &stats->pop_push_tok);
				writel(readl(q28_push_tok_next), &stats->pop_push_tok_next);
				writel(readl(q28_pop_tok), &stats->pop_pop_tok);
				writel(readl(q28_pop_tok_next), &stats->pop_pop_tok_next);
				fpm_get_pool_stats(0, &pool_stats);
				writel(pool_stats.overflow_count, &stats->pop_fpm.overflow_count);
				writel(pool_stats.underflow_count, &stats->pop_fpm.underflow_count);
				writel(pool_stats.tok_avail, &stats->pop_fpm.tok_avail);
				writel(pool_stats.alloc_fifo_empty, &stats->pop_fpm.alloc_fifo_empty);
				writel(pool_stats.alloc_fifo_full, &stats->pop_fpm.alloc_fifo_full);
				writel(pool_stats.free_fifo_empty, &stats->pop_fpm.free_fifo_empty);
				writel(pool_stats.free_fifo_full, &stats->pop_fpm.free_fifo_full);
				writel(pool_stats.pool_full, &stats->pop_fpm.pool_full);
				writel(pool_stats.invalid_tok_frees, &stats->pop_fpm.invalid_tok_frees);
				writel(pool_stats.invalid_tok_multi, &stats->pop_fpm.invalid_tok_multi);
				writel(pool_stats.mem_corrupt_tok, &stats->pop_fpm.mem_corrupt_tok);
				writel(pool_stats.mem_corrupt_tok_valid, &stats->pop_fpm.mem_corrupt_tok_valid);
				writel(pool_stats.invalid_free_tok, &stats->pop_fpm.invalid_free_tok);
				writel(pool_stats.invalid_free_tok_valid, &stats->pop_fpm.invalid_free_tok_valid);
				writel(pool_stats.invalid_mcast_tok, &stats->pop_fpm.invalid_mcast_tok);
				writel(pool_stats.invalid_mcast_tok_valid, &stats->pop_fpm.invalid_mcast_tok_valid);
#endif
			}
			if (show_stats) {
				rx_time = (jiffies - q->start_time) / HZ;
				if (rx_time > q->report_time) {
					q->report_time = rx_time;
					printk("Q #%d: %d msgs RX'd (%d msgs/sec, %d Mbps)\n",
					       q->qnum, q->msg_cnt, q->msg_cnt/q->report_time,
					       (((q->msg_cnt/q->report_time) >> 10) * ASSUMED_PACKET_SIZE * 8) >> 10);
				}
			}
			dqm_enable_rx_cb(q->q_h);
err_q_data:
			spin_lock_irqsave(&rx_qs_lock, flags);
		}
		spin_unlock_irqrestore(&rx_qs_lock, flags);
		set_current_state(TASK_INTERRUPTIBLE);
	}

	set_current_state(TASK_RUNNING);
	pr_debug("<--\n");
	return 0;
}

int dqmt_tx_thread(void *ctx)
{
	int status = 0;
	int i;
	u32 msgdata[DQM_MAX_MSGSZ];
	u8 msg_size;
	struct q_info *q = ctx;
	int tx_time;
#if CAPTURE_DQM_DEBUG_DATA
	u32 head_tail;
	struct fpm_pool_stats pool_stats;
	u32 push_count;
#endif

	pr_debug("-->\n");

	q->q_h = dqm_register("cpucomm", q->qnum, NULL, &q->msgsz, DQM_F_TX);
	if (!q->q_h) {
		pr_err("%s: Failed to register DQM\n", __FUNCTION__);
		status = -1;
		goto err_free_q;
	}

	msgdata[0] = WORD0_DATA;
	msgdata[1] = WORD1_DATA;
	q->msg_cnt = 0;
	q->drop_cnt = 0;
	q->report_time = 0;
	q->start_time = jiffies;
	for (i = 0; i < iter; i++) {
		pr_debug("%s: sending msg to DQM %d.\n", __func__, q->qnum);
#if CAPTURE_DQM_DEBUG_DATA
		writel(readl(q28_push_tok), &stats->pre_push_push_tok);
		writel(readl(q28_push_tok_next), &stats->pre_push_push_tok_next);
#endif
		status = dqm_tx(q->q_h, 1, q->msgsz, msgdata);
		if (!status) {
			q->msg_cnt++;
#if CAPTURE_DQM_DEBUG_DATA
			push_count = readl(&stats->push_count);
			writel(++push_count, &stats->push_count);
			head_tail = readl(q28_size);
			writel((head_tail & HEAD_PTR_MASK) >> HEAD_PTR_SHIFT, &stats->push_head);
			writel((head_tail & TAIL_PTR_MASK) >> TAIL_PTR_SHIFT, &stats->push_tail);
			writel(readl(q28_push_tok), &stats->post_push_push_tok);
			writel(readl(q28_push_tok_next), &stats->post_push_push_tok_next);
			writel(readl(q28_pop_tok), &stats->push_pop_tok);
			writel(readl(q28_pop_tok_next), &stats->push_pop_tok_next);
			fpm_get_pool_stats(0, &pool_stats);
			writel(pool_stats.overflow_count, &stats->push_fpm.overflow_count);
			writel(pool_stats.underflow_count, &stats->push_fpm.underflow_count);
			writel(pool_stats.tok_avail, &stats->push_fpm.tok_avail);
			writel(pool_stats.alloc_fifo_empty, &stats->push_fpm.alloc_fifo_empty);
			writel(pool_stats.alloc_fifo_full, &stats->push_fpm.alloc_fifo_full);
			writel(pool_stats.free_fifo_empty, &stats->push_fpm.free_fifo_empty);
			writel(pool_stats.free_fifo_full, &stats->push_fpm.free_fifo_full);
			writel(pool_stats.pool_full, &stats->push_fpm.pool_full);
			writel(pool_stats.invalid_tok_frees, &stats->push_fpm.invalid_tok_frees);
			writel(pool_stats.invalid_tok_multi, &stats->push_fpm.invalid_tok_multi);
			writel(pool_stats.mem_corrupt_tok, &stats->push_fpm.mem_corrupt_tok);
			writel(pool_stats.mem_corrupt_tok_valid, &stats->push_fpm.mem_corrupt_tok_valid);
			writel(pool_stats.invalid_free_tok, &stats->push_fpm.invalid_free_tok);
			writel(pool_stats.invalid_free_tok_valid, &stats->push_fpm.invalid_free_tok_valid);
			writel(pool_stats.invalid_mcast_tok, &stats->push_fpm.invalid_mcast_tok);
			writel(pool_stats.invalid_mcast_tok_valid, &stats->push_fpm.invalid_mcast_tok_valid);
#endif
		} else if (status == -EAGAIN)
			q->drop_cnt++;
		else
			pr_err("%s: Failure sending msg to DQM %d.\n", __func__, q->qnum);
		if (show_stats) {
			tx_time = (jiffies - q->start_time) / HZ;
			if (tx_time > q->report_time)
			{
				q->report_time = tx_time;
				printk("Q #%d: %d msgs TX'd (%d msgs/sec, %d Mbps), %d msgs dropped (%d msgs/sec, %d Mbps)\n",
					   q->qnum, q->msg_cnt, q->msg_cnt/q->report_time,
					   (((q->msg_cnt/q->report_time) >> 10) * ASSUMED_PACKET_SIZE * 8) >> 10,
					   q->drop_cnt, q->drop_cnt/q->report_time,
					   (((q->drop_cnt/q->report_time) >> 10) * ASSUMED_PACKET_SIZE * 8) >> 10);
			}
		}
		schedule();
	}
	status = dqm_release(q->q_h, DQM_F_TX);
	if (status)
		pr_err("%s: Failure releasing DQM %d.\n", __func__, q->qnum);

err_free_q:
	free_q(q);

	pr_debug("<--\n");
	return status;
}

static ssize_t
dqmt_show_stats(struct file *file, const char __user *buffer,
	  size_t count, loff_t *data)
{
	long tmp;

	pr_debug("-->\n");
	if (kstrtol_from_user(buffer, count, 0, &tmp))
		pr_err("%s: Error in kstrtol_from_user\n", __func__);
	show_stats = (tmp != 0);
	pr_debug("<--\n");

	return count;
}

static ssize_t
dqmt_iter(struct file *file, const char __user *buffer,
	  size_t count, loff_t *data)
{
	pr_debug("-->\n");
	if (kstrtol_from_user(buffer, count, 0, &iter))
		pr_err("%s: Error in kstrtol_from_user\n", __func__);
	pr_debug("<--\n");

	return count;
}

static ssize_t
dqmt_tx(struct file *file, const char __user *buffer,
	size_t count, loff_t *data)
{
	long qnum;
	char thread_name[32];
	struct q_info *q;

	pr_debug("-->\n");

	if (kstrtol_from_user(buffer, count, 0, &qnum)) {
		pr_err("%s: Error in kstrtol_from_user\n", __func__);
		return count;
	}
	q = alloc_q();
	if (!q) {
		pr_err("%s: Max open Q's exceeded.\n", __FUNCTION__);
		goto done;
	}
	q->qnum = qnum;
	snprintf(thread_name, sizeof(thread_name), "dqmt_tx%d", q->qnum);
	thread_name[sizeof(thread_name)-1] = '\0';
	q->tx_thread = kthread_run(dqmt_tx_thread, (void *)q, thread_name);
	if (!q->tx_thread)
		pr_err("%s: Failed to create thread %s.\n", __func__, thread_name);

done:
	pr_debug("<--\n");
	return count;
}

static ssize_t
dqmt_rx(struct file *file, const char __user *buffer,
	size_t count, loff_t *data)
{
	int status = 0;
	long qnum;
	struct dqm_cb cb = {};
	u8 msg_size;
	struct q_info *q;

	pr_debug("-->\n");

	if (kstrtol_from_user(buffer, count, 0, &qnum)) {
		pr_err("%s: Error in kstrtol_from_user\n", __func__);
		return count;
	}
	if (qnum >= 0) {
		q = alloc_q();
		if (!q) {
			pr_err("%s: Max open Q's exceeded.\n", __FUNCTION__);
			goto done;
		}
		q->qnum = qnum;

		cb.fn = dqmt_rx_isr;
		cb.context = q;
		q->q_h = dqm_register("cpucomm", q->qnum, &cb, &q->msgsz,
				      DQM_F_RX);
		if (!q->q_h) {
			pr_err("%s: Failed to register DQM\n", __FUNCTION__);
			goto err_free_q;
		}

		q->msg_cnt = 0;
		q->drop_cnt = 0;
		q->report_time = 0;
		q->start_time = 0;
		status = dqm_enable_rx_cb(q->q_h);
		if (status) {
			pr_err("%s: Failure enabling interrupts on DQM %d.\n", __func__, q->qnum);
			goto err_unregister_q;
		}
	} else {
		qnum = -qnum;
		q = find_q_by_num(qnum);
		if (!q) {
			pr_err("%s: Unable to find Q #%ld.\n", __FUNCTION__, qnum);
			goto done;
		}
		status = dqm_disable_rx_cb(q->q_h);
		if (status)
			pr_err("%s: Failure disabling interrupts on DQM %d.\n", __func__, q->qnum);
		status |= dqm_release(q->q_h, DQM_F_RX);
		if (status)
			pr_err("%s: Failure releasing DQM %d.\n", __func__, q->qnum);
		free_q(q);
	}
	goto done;

err_unregister_q:
	dqm_release(q->q_h, DQM_F_RX);

err_free_q:
	free_q(q);

done:
	pr_debug("<--\n");
	return count;
}

static const struct file_operations dqmt_proc_tx_ops = {
	.owner  = THIS_MODULE,
	.write = dqmt_tx,
};

static const struct file_operations dqmt_proc_rx_ops = {
	.owner  = THIS_MODULE,
	.write = dqmt_rx,
};

static const struct file_operations dqmt_proc_iter_ops = {
	.owner  = THIS_MODULE,
	.write = dqmt_iter,
};

static const struct file_operations dqmt_proc_show_stats_ops = {
	.owner  = THIS_MODULE,
	.write = dqmt_show_stats,
};

/*
 * Module init.
 *
 * returns success (0) or failure (< 0)
 */
__init int dqmt_init(void)
{
	int status = 0;
	struct proc_dir_entry *entry;
	int i;
	char thread_name[32];
#if CAPTURE_DQM_DEBUG_DATA
	u32 *qsm;
#endif

	pr_debug("-->\n");

#if CAPTURE_DQM_DEBUG_DATA
	stats = ioremap(0xd0a40000, 0xc000);
	for (qsm = (u32 *)stats, i = 0; i < 0xc000 >> 2; qsm++, i++)
		writel(0, qsm);
	q28_size = ioremap(0xd3808380, 4);
	q28_push_tok = ioremap(0xd3808390, 4);
	q28_push_tok_next = ioremap(0xd3808394, 4);
	q28_pop_tok = ioremap(0xd3808398, 4);
	q28_pop_tok_next = ioremap(0xd380839c, 4);
#endif

	iter = DEFAULT_TX_ITERATIONS;
	memset(qs, 0, sizeof(qs));
	for (i = 0; i < MAX_QS; i++)
		qs[i].q_h = NULL;

	init_waitqueue_head(&rx_wait_q);
	INIT_LIST_HEAD(&rx_qs);
	spin_lock_init(&rx_qs_lock);

	snprintf(thread_name, sizeof(thread_name), "dqmt_rx");
	thread_name[sizeof(thread_name)-1] = '\0';
	rx_thread = kthread_run(dqmt_rx_thread, NULL, thread_name);
	if (!rx_thread) {
		pr_err("%s: Failed to create thread %s.\n", __func__, thread_name);
		status = -1;
		goto done;
	}

	entry = proc_mkdir("driver/dqmt", NULL);
	if (!entry) {
		pr_err("Failed to create /proc/driver/dqmt.\n");
		status = -1;
		goto done;
	}
	entry = proc_create("driver/dqmt/tx", 0644, NULL, &dqmt_proc_tx_ops);
	if (!entry) {
		pr_err("Failed to create /proc/driver/dqmt/tx.\n");
		status = -1;
		goto done;
	}

	entry = proc_create("driver/dqmt/rx", 0644, NULL, &dqmt_proc_rx_ops);
	if (!entry) {
		pr_err("Failed to create /proc/driver/dqmt/rx.\n");
		status = -1;
		goto done;
	}

	entry = proc_create("driver/dqmt/iter", 0644, NULL, &dqmt_proc_iter_ops);
	if (!entry) {
		pr_err("Failed to create /proc/driver/dqmt/iter.\n");
		status = -1;
		goto done;
	}

	entry = proc_create("driver/dqmt/show_stats", 0644, NULL, &dqmt_proc_show_stats_ops);
	if (!entry) {
		pr_err("Failed to create /proc/driver/dqmt/show_stats.\n");
		status = -1;
		goto done;
	}

done:
	pr_debug("<--\n");
	return status;
}

/*
 * Module exit.
 */
void dqmt_exit(void)
{
	int i;
	struct q_info *q;

	pr_debug("-->\n");

	kill_rx_thread = true;
	wake_up(&rx_wait_q);
	for (i = 0; i < MAX_QS; i++) {
		q = &qs[i];
		if (q->q_h != NULL) {
			dqm_disable_rx_cb(q->q_h);
			dqm_disable_tx_cb(q->q_h);
			dqm_release(q->q_h, DQM_F_TX | DQM_F_RX);
			free_q(q);
		}
	}
	remove_proc_entry("driver/dqmt/show_stats", NULL);
	remove_proc_entry("driver/dqmt/iter", NULL);
	remove_proc_entry("driver/dqmt/rx", NULL);
	remove_proc_entry("driver/dqmt/tx", NULL);
	remove_proc_entry("driver/dqmt", NULL);

	pr_debug("<--\n");
}

module_init(dqmt_init);
module_exit(dqmt_exit);
MODULE_LICENSE("GPL v2");
