/*
 * Copyright (C) 2019 sysmocom - s.f.m.c. GmbH
 * All Rights Reserved
 *
 * SPDX-License-Identifier: AGPL-3.0+
 *
 * Author: Pau Espin Pedrol <pespin@sysmocom.de>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * See the COPYING file in the main directory for details.
 */

/*
 * rate_ctr API uses several osmocom select loop features, and as a result,
 * calls to it must be done through the main thread (the one running the osmocom
 * loop in osmo-trx).
 * Since read/write from/to SDR is done in separate threads (even read and write
 * each use a different thread), we must use some sort of message passing system
 * between main thread feeding rate_ctr structures and the Rx/Tx threads
 * generating the events.
 * The idea is that upon read/write issues, lower layers (SDR APIs) provide us with
 * underrun/overrun/droppedPackets information, and in that case we pass that up
 * the stack through signal <SS_DEVICE,S_DEVICE_COUNTER_CHANGE> with signal_cb
 * being a pointer to a "struct device_counters" structure, which contains
 * device (implementation agnostic) statful counters for different kind of
 * statistics.
 * That signal is processed here in device_sig_cb, where a copy of the "struct
 * device_counters" structure is held and the main thread is instructed through
 * a timerfd to update rate_ctr APIs against this copy. All this is done inside
 * a mutex to avoid different race conditions (between Rx andTx threads, and
 * between Rx/Tx and main thread). For the same reason, callers of signal
 * <SS_DEVICE,S_DEVICE_COUNTER_CHANGE> (device_sig_cb), that is Rx/Tx threads,
 * must do so with PTHREAD_CANCEL_DISABLE, in order to avoid possible deadlocks
 * in case the main thread decides to cancel other threads due to a shutdown
 * operation (fi SIGKILL received)
 */

#include <string.h>
#include <stdint.h>
#include <inttypes.h>
#include <netinet/in.h>
#include <arpa/inet.h>

extern "C" {
#include <osmocom/core/talloc.h>
#include <osmocom/core/utils.h>
#include <osmocom/core/rate_ctr.h>
#include <osmocom/core/select.h>
#include <osmocom/core/stats.h>
#include <osmocom/core/timer.h>

#include "osmo_signal.h"
#include "trx_vty.h"
#include "trx_rate_ctr.h"
}
#include "Threads.h"
#include "Logger.h"

/* Used in dev_ctrs_pending, when set it means that channel slot contains unused
   (non-pending) counter data */
#define PENDING_CHAN_NONE SIZE_MAX

static void *trx_rate_ctr_ctx;

static struct rate_ctr_group** rate_ctrs;
static struct device_counters* dev_ctrs_pending;
static struct trx_counters* trx_ctrs_pending;
static size_t chan_len;
static struct osmo_fd dev_rate_ctr_timerfd;
static struct osmo_fd trx_rate_ctr_timerfd;
static Mutex dev_rate_ctr_mutex;
static Mutex trx_rate_ctr_mutex;

struct osmo_timer_list threshold_timer;
static LLIST_HEAD(threshold_list);
static unsigned int threshold_timer_sched_secs;
static bool threshold_initied;

const struct value_string rate_ctr_intv[] = {
	{ RATE_CTR_INTV_SEC,	"per-second" },
	{ RATE_CTR_INTV_MIN,	"per-minute" },
	{ RATE_CTR_INTV_HOUR,	"per-hour" },
	{ RATE_CTR_INTV_DAY, 	"per-day" },
	{ 0, NULL }
};

const struct value_string trx_chan_ctr_names[] = {
	{ TRX_CTR_DEV_RX_OVERRUNS,	"rx_overruns" },
	{ TRX_CTR_DEV_TX_UNDERRUNS,	"tx_underruns" },
	{ TRX_CTR_DEV_RX_DROP_EV,	"rx_drop_events" },
	{ TRX_CTR_DEV_RX_DROP_SMPL,	"rx_drop_samples" },
	{ TRX_CTR_DEV_TX_DROP_EV,	"tx_drop_events" },
	{ TRX_CTR_DEV_TX_DROP_SMPL,	"tx_drop_samples" },
	{ TRX_CTR_TRX_TX_STALE_BURSTS,	"tx_stale_bursts" },
	{ TRX_CTR_TRX_TX_UNAVAILABLE_BURSTS, "tx_unavailable_bursts" },
	{ TRX_CTR_TRX_TRXD_FN_REPEATED,	"tx_trxd_fn_repeated" },
	{ TRX_CTR_TRX_TRXD_FN_OUTOFORDER, "tx_trxd_fn_outoforder" },
	{ TRX_CTR_TRX_TRXD_FN_SKIPPED,	"tx_trxd_fn_skipped" },
	{ TRX_CTR_TRX_RX_EMPTY_BURST,	"rx_empty_burst" },
	{ TRX_CTR_TRX_RX_CLIPPING,	"rx_clipping" },
	{ TRX_CTR_TRX_RX_NO_BURST_DETECTED, "rx_no_burst_detected" },
	{ 0, NULL }
};

static const struct rate_ctr_desc trx_chan_ctr_desc[] = {
	[TRX_CTR_DEV_RX_OVERRUNS]		= { "device:rx_overruns",	"Number of Rx overruns in FIFO queue" },
	[TRX_CTR_DEV_TX_UNDERRUNS]		= { "device:tx_underruns",	"Number of Tx underruns in FIFO queue" },
	[TRX_CTR_DEV_RX_DROP_EV]		= { "device:rx_drop_events",	"Number of times Rx samples were dropped by HW" },
	[TRX_CTR_DEV_RX_DROP_SMPL]		= { "device:rx_drop_samples",	"Number of Rx samples dropped by HW" },
	[TRX_CTR_DEV_TX_DROP_EV]		= { "device:tx_drop_events",	"Number of times Tx samples were dropped by HW" },
	[TRX_CTR_DEV_TX_DROP_SMPL]		= { "device:tx_drop_samples",	"Number of Tx samples dropped by HW" },
	[TRX_CTR_TRX_TX_STALE_BURSTS]		= { "trx:tx_stale_bursts",	"Number of Tx burts dropped by TRX due to arriving too late" },
	[TRX_CTR_TRX_TX_UNAVAILABLE_BURSTS]	= { "trx:tx_unavailable_bursts","Number of Tx burts unavailable (not enqueued) at the time they should be transmitted" },
	[TRX_CTR_TRX_TRXD_FN_REPEATED]		= { "trx:tx_trxd_fn_repeated",	"Number of Tx burts received from TRXD with repeated FN" },
	[TRX_CTR_TRX_TRXD_FN_OUTOFORDER]	= { "trx:tx_trxd_fn_outoforder","Number of Tx burts received from TRXD with a past FN" },
	[TRX_CTR_TRX_TRXD_FN_SKIPPED]		= { "trx:tx_trxd_fn_skipped",	"Number of Tx burts potentially skipped due to FN jumps" },
	[TRX_CTR_TRX_RX_EMPTY_BURST]		= { "trx:rx_empty_burst",	"Number of Rx bursts empty" },
	[TRX_CTR_TRX_RX_CLIPPING]		= { "trx:rx_clipping",		"Number of Rx bursts discarded due to clipping" },
	[TRX_CTR_TRX_RX_NO_BURST_DETECTED]	= { "trx:rx_no_burst_detected",	"Number of Rx burts discarded due to burst detection error" },
};

static const struct rate_ctr_group_desc trx_chan_ctr_group_desc = {
	.group_name_prefix		= "trx:chan",
	.group_description		= "osmo-trx statistics",
	.class_id			= OSMO_STATS_CLASS_GLOBAL,
	.num_ctr			= ARRAY_SIZE(trx_chan_ctr_desc),
	.ctr_desc			= trx_chan_ctr_desc,
};

static int dev_rate_ctr_timerfd_cb(struct osmo_fd *ofd, unsigned int what) {
	size_t chan;
	struct rate_ctr *ctr;
	LOGC(DCTR, INFO) << "Main thread is updating Device counters";
	dev_rate_ctr_mutex.lock();
	for (chan = 0; chan < chan_len; chan++) {
		if (dev_ctrs_pending[chan].chan == PENDING_CHAN_NONE)
			continue;
		LOGCHAN(chan, DCTR, DEBUG) << "rate_ctr update";
		ctr = rate_ctr_group_get_ctr(rate_ctrs[chan], TRX_CTR_DEV_RX_OVERRUNS);
		rate_ctr_add(ctr, dev_ctrs_pending[chan].rx_overruns - ctr->current);
		ctr = rate_ctr_group_get_ctr(rate_ctrs[chan], TRX_CTR_DEV_TX_UNDERRUNS);
		rate_ctr_add(ctr, dev_ctrs_pending[chan].tx_underruns - ctr->current);
		ctr = rate_ctr_group_get_ctr(rate_ctrs[chan], TRX_CTR_DEV_RX_DROP_EV);
		rate_ctr_add(ctr, dev_ctrs_pending[chan].rx_dropped_events - ctr->current);
		ctr = rate_ctr_group_get_ctr(rate_ctrs[chan], TRX_CTR_DEV_RX_DROP_SMPL);
		rate_ctr_add(ctr, dev_ctrs_pending[chan].rx_dropped_samples - ctr->current);
		ctr = rate_ctr_group_get_ctr(rate_ctrs[chan], TRX_CTR_DEV_TX_DROP_EV);
		rate_ctr_add(ctr, dev_ctrs_pending[chan].tx_dropped_events - ctr->current);
		ctr = rate_ctr_group_get_ctr(rate_ctrs[chan], TRX_CTR_DEV_TX_DROP_SMPL);
		rate_ctr_add(ctr, dev_ctrs_pending[chan].tx_dropped_samples - ctr->current);

		/* Mark as done */
		dev_ctrs_pending[chan].chan = PENDING_CHAN_NONE;
	}
	if (osmo_timerfd_disable(&dev_rate_ctr_timerfd) < 0)
		LOGC(DCTR, ERROR) << "Failed to disable timerfd";
	dev_rate_ctr_mutex.unlock();
	return 0;
}

static int trx_rate_ctr_timerfd_cb(struct osmo_fd *ofd, unsigned int what) {
	size_t chan;
	struct rate_ctr *ctr;
	LOGC(DCTR, INFO) << "Main thread is updating Transceiver counters";
	trx_rate_ctr_mutex.lock();
	for (chan = 0; chan < chan_len; chan++) {
		if (trx_ctrs_pending[chan].chan == PENDING_CHAN_NONE)
			continue;
		LOGCHAN(chan, DCTR, DEBUG) << "rate_ctr update";
		ctr = rate_ctr_group_get_ctr(rate_ctrs[chan], TRX_CTR_TRX_TX_STALE_BURSTS);
		rate_ctr_add(ctr, trx_ctrs_pending[chan].tx_stale_bursts - ctr->current);
		ctr = rate_ctr_group_get_ctr(rate_ctrs[chan], TRX_CTR_TRX_TX_UNAVAILABLE_BURSTS);
		rate_ctr_add(ctr, trx_ctrs_pending[chan].tx_unavailable_bursts - ctr->current);
		ctr = rate_ctr_group_get_ctr(rate_ctrs[chan], TRX_CTR_TRX_TRXD_FN_REPEATED);
		rate_ctr_add(ctr, trx_ctrs_pending[chan].tx_trxd_fn_repeated - ctr->current);
		ctr = rate_ctr_group_get_ctr(rate_ctrs[chan], TRX_CTR_TRX_TRXD_FN_OUTOFORDER);
		rate_ctr_add(ctr, trx_ctrs_pending[chan].tx_trxd_fn_outoforder - ctr->current);
		ctr = rate_ctr_group_get_ctr(rate_ctrs[chan], TRX_CTR_TRX_TRXD_FN_SKIPPED);
		rate_ctr_add(ctr, trx_ctrs_pending[chan].tx_trxd_fn_skipped - ctr->current);
		ctr = rate_ctr_group_get_ctr(rate_ctrs[chan], TRX_CTR_TRX_RX_EMPTY_BURST);
		rate_ctr_add(ctr, trx_ctrs_pending[chan].rx_empty_burst - ctr->current);
		ctr = rate_ctr_group_get_ctr(rate_ctrs[chan], TRX_CTR_TRX_RX_CLIPPING);
		rate_ctr_add(ctr, trx_ctrs_pending[chan].rx_clipping - ctr->current);
		ctr = rate_ctr_group_get_ctr(rate_ctrs[chan], TRX_CTR_TRX_RX_NO_BURST_DETECTED);
		rate_ctr_add(ctr, trx_ctrs_pending[chan].rx_no_burst_detected - ctr->current);
		/* Mark as done */
		trx_ctrs_pending[chan].chan = PENDING_CHAN_NONE;
	}
	if (osmo_timerfd_disable(&trx_rate_ctr_timerfd) < 0)
		LOGC(DCTR, ERROR) << "Failed to disable timerfd";
	trx_rate_ctr_mutex.unlock();
	return 0;
}

/* Callback function to be called every time we receive a signal from DEVICE */
static int device_sig_cb(unsigned int subsys, unsigned int signal,
			 void *handler_data, void *signal_data)
{
	struct device_counters *dev_ctr;
	struct trx_counters *trx_ctr;
	/* Delay sched around 20 ms, in case we receive several calls from several
	 * channels batched */
	struct timespec next_sched = {.tv_sec = 0, .tv_nsec = 20*1000*1000};
	/* no automatic re-trigger */
	struct timespec intv_sched = {.tv_sec = 0, .tv_nsec = 0};
	char err_buf[256];

	switch (signal) {
	case S_DEVICE_COUNTER_CHANGE:
		dev_ctr = (struct device_counters *)signal_data;
		LOGCHAN(dev_ctr->chan, DCTR, INFO) << "Received counter change from radioDevice";
		dev_rate_ctr_mutex.lock();
		dev_ctrs_pending[dev_ctr->chan] = *dev_ctr;
		if (osmo_timerfd_schedule(&dev_rate_ctr_timerfd, &next_sched, &intv_sched) < 0) {
			LOGC(DCTR, ERROR) << "Failed to schedule timerfd: " << errno
					  << " = "<< strerror_r(errno, err_buf, sizeof(err_buf));
		}
		dev_rate_ctr_mutex.unlock();
		break;
	case S_TRX_COUNTER_CHANGE:
		trx_ctr = (struct trx_counters *)signal_data;
		LOGCHAN(trx_ctr->chan, DCTR, INFO) << "Received counter change from Transceiver";
		trx_rate_ctr_mutex.lock();
		trx_ctrs_pending[trx_ctr->chan] = *trx_ctr;
		if (osmo_timerfd_schedule(&trx_rate_ctr_timerfd, &next_sched, &intv_sched) < 0) {
			LOGC(DCTR, ERROR) << "Failed to schedule timerfd: " << errno
					  << " = "<< strerror_r(errno, err_buf, sizeof(err_buf));
		}
		trx_rate_ctr_mutex.unlock();
		break;
	default:
		break;
	}
	return 0;
}

/************************************
 * ctr_threshold  APIs
 ************************************/
static const char* ctr_threshold_2_vty_str(struct ctr_threshold *ctr)
{
	static char buf[256];
	int rc = 0;
	rc += snprintf(buf, sizeof(buf), "ctr-error-threshold %s", get_value_string(trx_chan_ctr_names, ctr->ctr_id));
	rc += snprintf(buf + rc, sizeof(buf) - rc, " %d %s", ctr->val, get_value_string(rate_ctr_intv, ctr->intv));
	return buf;
}

static void threshold_timer_cb(void *data)
{
	struct ctr_threshold *ctr_thr;
	struct rate_ctr *rate_ctr;
	size_t chan;
	LOGC(DCTR, DEBUG) << "threshold_timer_cb fired!";

	llist_for_each_entry(ctr_thr, &threshold_list, list) {
		for (chan = 0; chan < chan_len; chan++) {
			rate_ctr = rate_ctr_group_get_ctr(rate_ctrs[chan], ctr_thr->ctr_id);
			LOGCHAN(chan, DCTR, INFO) << "checking threshold: " << ctr_threshold_2_vty_str(ctr_thr)
						   << " ("<< rate_ctr->intv[ctr_thr->intv].rate << " vs " << ctr_thr->val << ")";
			if (rate_ctr->intv[ctr_thr->intv].rate >= ctr_thr->val) {
				LOGCHAN(chan, DCTR, FATAL) << "threshold reached, stopping! " << ctr_threshold_2_vty_str(ctr_thr)
							   << " ("<< rate_ctr->intv[ctr_thr->intv].rate << " vs " << ctr_thr->val << ")";
				osmo_signal_dispatch(SS_MAIN, S_MAIN_STOP_REQUIRED, NULL);
				return;
			}
		}
	}
	osmo_timer_schedule(&threshold_timer, threshold_timer_sched_secs, 0);
}

static size_t ctr_threshold_2_seconds(struct ctr_threshold *ctr)
{
	size_t mult = 0;
	switch (ctr->intv) {
	case RATE_CTR_INTV_SEC:
		mult = 1;
		break;
	case RATE_CTR_INTV_MIN:
		mult = 60;
		break;
	case RATE_CTR_INTV_HOUR:
		mult = 60*60;
		break;
	case RATE_CTR_INTV_DAY:
		mult = 60*60*24;
		break;
	default:
		OSMO_ASSERT(false);
	}
	return mult;
}

static void threshold_timer_update_intv() {
	struct ctr_threshold *ctr, *min_ctr;
	size_t secs, min_secs;

	/* Avoid scheduling timer until itself and other structures are prepared
	   by trx_rate_ctr_init */
	if (!threshold_initied)
		return;

	if (llist_empty(&threshold_list)) {
		osmo_timer_del(&threshold_timer);
		return;
	}

	min_ctr = llist_first_entry(&threshold_list, struct ctr_threshold, list);
	min_secs = ctr_threshold_2_seconds(min_ctr);

	llist_for_each_entry(ctr, &threshold_list, list) {
		secs = ctr_threshold_2_seconds(ctr);
		if (min_secs > secs)
			min_secs = secs;
	}


	threshold_timer_sched_secs = OSMO_MAX((int)(min_secs / 2 - 1), 1);
	LOGC(DCTR, INFO) << "New ctr-error-threshold check interval: "
			  << threshold_timer_sched_secs << " seconds";
	osmo_timer_schedule(&threshold_timer, threshold_timer_sched_secs, 0);
}

/* Init rate_ctr subsystem. Expected to be called during process start by main thread before VTY is ready */
void trx_rate_ctr_init(void *ctx, struct trx_ctx* trx_ctx)
{
	size_t  i;
	trx_rate_ctr_ctx = ctx;
	chan_len = trx_ctx->cfg.num_chans;
	dev_ctrs_pending = (struct device_counters*) talloc_zero_size(ctx, chan_len * sizeof(struct device_counters));
	trx_ctrs_pending = (struct trx_counters*) talloc_zero_size(ctx, chan_len * sizeof(struct trx_counters));
	rate_ctrs = (struct rate_ctr_group**) talloc_zero_size(ctx, chan_len * sizeof(struct rate_ctr_group*));

	for (i = 0; i < chan_len; i++) {
		dev_ctrs_pending[i].chan = PENDING_CHAN_NONE;
		trx_ctrs_pending[i].chan = PENDING_CHAN_NONE;
		rate_ctrs[i] = rate_ctr_group_alloc(ctx, &trx_chan_ctr_group_desc, i);
		if (!rate_ctrs[i]) {
			LOGCHAN(i, DCTR, ERROR) << "Failed to allocate rate ctr";
			exit(1);
		}
	}
	dev_rate_ctr_timerfd.fd = -1;
	if (osmo_timerfd_setup(&dev_rate_ctr_timerfd, dev_rate_ctr_timerfd_cb, NULL) < 0) {
		LOGC(DCTR, ERROR) << "Failed to setup timerfd";
		exit(1);
	}
	trx_rate_ctr_timerfd.fd = -1;
	if (osmo_timerfd_setup(&trx_rate_ctr_timerfd, trx_rate_ctr_timerfd_cb, NULL) < 0) {
		LOGC(DCTR, ERROR) << "Failed to setup timerfd";
		exit(1);
	}
	osmo_signal_register_handler(SS_DEVICE, device_sig_cb, NULL);

	/* Now set up threshold checks */
	threshold_initied = true;
	osmo_timer_setup(&threshold_timer, threshold_timer_cb, NULL);
	threshold_timer_update_intv();
}

void trx_rate_ctr_threshold_add(struct ctr_threshold *ctr)
{
	struct ctr_threshold *new_ctr;

	new_ctr = talloc_zero(trx_rate_ctr_ctx, struct ctr_threshold);
	*new_ctr = *ctr;
	LOGC(DCTR, NOTICE) << "Adding new threshold check: " << ctr_threshold_2_vty_str(new_ctr);
	llist_add(&new_ctr->list, &threshold_list);
	threshold_timer_update_intv();
}

int trx_rate_ctr_threshold_del(struct ctr_threshold *del_ctr)
{
	struct ctr_threshold *ctr;

	llist_for_each_entry(ctr, &threshold_list, list) {
		if (ctr->intv != del_ctr->intv ||
		    ctr->ctr_id != del_ctr->ctr_id ||
		    ctr->val != del_ctr->val)
			continue;

		LOGC(DCTR, NOTICE) << "Deleting threshold check: " << ctr_threshold_2_vty_str(del_ctr);
		llist_del(&ctr->list);
		talloc_free(ctr);
		threshold_timer_update_intv();
		return 0;
	}
	return -1;
}

void trx_rate_ctr_threshold_write_config(struct vty *vty, char *indent_prefix)
{
	struct ctr_threshold *ctr;

	llist_for_each_entry(ctr, &threshold_list, list) {
		vty_out(vty, "%s%s%s", indent_prefix, ctr_threshold_2_vty_str(ctr), VTY_NEWLINE);
	}
}