/****************************************************************************
 *
 * Broadcom Proprietary and Confidential.
 * (c) 2023 Broadcom. All rights reserved.
 * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries.
 *
 * Unless you and Broadcom execute a separate written software license
 * agreement governing use of this software, this software is licensed to
 * you under the terms of the GNU General Public License version 2 (the
 * "GPL"), available at [http://www.broadcom.com/licenses/GPLv2.php], with
 * the following added to such license:
 *
 * As a special exception, the copyright holders of this software give you
 * permission to link this software with independent modules, and to copy
 * and distribute the resulting executable under terms of your choice,
 * provided that you also meet, for each linked independent module, the
 * terms and conditions of the license of that module. An independent
 * module is a module which is not derived from this software. The special
 * exception does not apply to any modifications of the software.
 *
 * Notwithstanding the above, under no circumstances may you combine this
 * software in any way with any other Broadcom software provided under a
 * license other than the GPL, without Broadcom's express prior written
 * consent.
 *
 ****************************************************************************
 * Author: Peter Sulc <peter.sulc@broadcom.com>
 *****************************************************************************/

#include <linux/types.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/io.h>
#include <linux/bitops.h>

#include "dqm.h"
#include "dqm_dev.h"
#include "dqm_hw.h"

/* arm64 multiword burst instructions */

static inline int rx_4(u32 *reg, int messages, u32 *data)
{
	unsigned long long r1, r2;
	int num_read = 0;
	u32 *out = data;
	u32 *in = reg;

	while (messages) {
		__asm__("	ldp	%x[r1], %x[r2],	%[in]	;"
			"	stp	%x[r1], %x[r2],	%[out]	;"
			:	[out]	"=Q"(*out),
				[r1]	"=r"(r1),
				[r2]	"=r"(r2)
			:	[in]	"Q"(*in)
			:	"cc");
		if (!r1 && !r2) {
			break;
		}
		num_read++;
		messages--;
		out += 4;
	}
	return num_read;
}

static inline int rx_2(u32 *reg, int messages, u32 *data)
{
	unsigned long r1, r2;
	int num_read = 0;
	u32 *out = data;
	u32 *in = reg;

	while (messages) {
		__asm__("	ldp	%w[r1], %w[r2],	%[in]	;"
			"	stp	%w[r1], %w[r2],	%[out]	;"
			:	[out]	"=Q"(*out),
				[r1]	"=r"(r1),
				[r2]	"=r"(r2)
			:	[in]	"Q"(*in)
			:	"cc");
		if (!r1 && !r2) {
			break;
		}
		num_read++;
		messages--;
		out += 2;
	}
	return num_read;
}

static int dqm_arm64_rx_4(struct dqm *q, int messages, u32 *data)
{
	return rx_4(q->data, messages, data);
}

static int dqm_arm64_rx_3(struct dqm *q, int messages, u32 *data)
{
	u32 *reg = q->data;
	int ret = messages;

	while (messages--) {
		data[0] = reg[0];
		data[1] = reg[1];
		data[2] = reg[2];
		data += 3;
	}
	return ret;
}

static int dqm_arm64_rx_2(struct dqm *q, int messages, u32 *data)
{
	return rx_2(q->data, messages, data);
}

static int dqm_arm64_rx_1(struct dqm *q, int messages, u32 *data)
{
	int n = messages;
	while (n--)
		*data++ = *q->data;
	return messages;
}

void dqm_set_rx(struct dqm *q)
{
	switch (q->msg_size) {
	case 1:
		q->rx = dqm_arm64_rx_1;
		break;
	case 2:
		q->rx = dqm_arm64_rx_2;
		break;
	case 3:
		q->rx = dqm_arm64_rx_3;
		break;
	case 4:
		q->rx = dqm_arm64_rx_4;
		break;
	default:
		q->rx = dqm_arm64_rx_4;
		pr_debug("%s undefined message size %d for dqm %s use 4\n",
			 __func__, q->msg_size, q->name);
		break;
	}
	pr_debug("%s dqm q %32s num %d message size %d\n",
		 __func__, q->name, q->num, q->msg_size);
}

static inline void tx_4(u32 *reg, u32 *data)
{
	register unsigned long long r1, r2;

	__asm__("	ldp	%x[r1], %x[r2],	%[data]	;"
		"	stp	%x[r1], %x[r2],	%[reg]	;"
		:	[reg]	"=Q"(*reg),
			[r1]	"=r"(r1),
			[r2]	"=r"(r2)
		:	[data]	"Q"(*data)
		:	"cc");
}

static inline void tx_2(u32 *reg, u32 *data)
{
	u64 *r = (u64 *)reg;
	u64 *d = (u64 *)data;

	*r = *d;
}

static int dqm_arm64_tx_4(struct dqm *q, int messages, u32 *data)
{
	int i;
	for (i = 0; i < messages; i++) {
		tx_4(q->data, data);
		data += 4;
	}
	return messages;
}

static int dqm_arm64_tx_3(struct dqm *q, int messages, u32 *data)
{
	int i;
	for (i = 0; i < messages; i++) {
		tx_2(q->data, data);
		q->data[2] = data[2];
		data += 3;
	}
	return messages;
}

static int dqm_arm64_tx_2(struct dqm *q, int messages, u32 *data)
{
	int i;
	for (i = 0; i < messages; i++) {
		tx_2(q->data, data);
		data += 2;
	}
	return messages;
}

static int dqm_arm64_tx_1(struct dqm *q, int messages, u32 *data)
{
	int i;
	for (i = 0; i < messages; i++)
		*q->data = *data++;
	return messages;
}

void dqm_set_tx(struct dqm *q)
{
	switch (q->msg_size) {
	case 1:
		q->tx = dqm_arm64_tx_1;
		break;
	case 2:
		q->tx = dqm_arm64_tx_2;
		break;
	case 3:
		q->tx = dqm_arm64_tx_3;
		break;
	case 4:
		q->tx = dqm_arm64_tx_4;
		break;
	default:
		q->tx = dqm_arm64_tx_1;
		pr_debug("%s undefined message size %d for dqm %s use 1\n",
			 __func__, q->msg_size, q->name);
		break;
	}
	pr_debug("%s dqm q %32s num %d message size %d\n",
		 __func__, q->name, q->num, q->msg_size);
}
