import { useEffect, useMemo, useState } from 'react';

import { dateTime, PanelData, TimeRange } from '@grafana/data';
import { DataQuery } from '@grafana/schema';

import { cloneDeep } from 'lodash';

import { DBScanConfig, OutlierResults } from 'api/types';
import { DBScan, DBScanData } from 'components/OutlierAlgorithms/dbscan';
import { MAD, MADData, MADDoubleMedianData } from 'components/OutlierAlgorithms/mad';

import { useOwnQueryResult } from './useOwnQueryResult';

const NOOPDataQuery = [{ refId: 'Z', hide: true, expr: '' }]; // Query that useQueryResult can run quickly

const MIN_NUMBER_OF_TIME_SERIES = 3;
const MAX_NUMBER_OF_TIME_SERIES = 1000;

/*
 * useQueryOutlierResult - is a hook that runs either DBSCAN or MAD depending on `algorithm`
 *
 * Each algorithm has different requirements which are combined inside this hook. Due to the
 * limitations of React hooks (cannot include them in `if` statements), the data flow of this
 * is slightly peculiar.
 *
 * MAD
 * ===
 * To match backend, MAD needs at least 24 hours of historical data to calculate a
 * suitable global median. If the original query the user performs is less than
 * 24 hours, this code performs a second low resolution query for a 24 hour timespan
 * in order to calculate its median and pass that value to the MAD implementation.
 * If timespan greater than 24 hours, MAD will calculate its median internally based
 * solely on the data it receives (and the low-res query is performed on a NO-OP query).
 *
 * DBSCAN
 * ===
 * DBScan relies on a hyperparameter 'eps' that is the minimum inter-point distance
 * to consider those points in a cluster. To map `sensitivity` to this `eps` value
 * requires the data span (i.e. max-min) of the data over a period of time.
 * To obtain this, perform the Query against the datasource with a 7 day time range
 * at a lower resolution and analyse the resulting data.
 *
 * Returns:
 * - OutlierResults?: results of the applied algorithm to the data
 * - DBScanConfig?: optional config to pass to backend
 */
export function useQueryOutlierResult(
  alignedData: PanelData | undefined,
  queries: Array<DataQuery & { expr?: string | undefined }>,
  timeRange: TimeRange,
  timeZone: string,
  datasourceUid: string,
  algorithm: string,
  sensitivity: number
): [OutlierResults | undefined, DBScanConfig | undefined, string | undefined] {
  const [errorState, setErrorState] = useState<string | undefined>(undefined);
  const [outlierResult, setOutlierResult] = useState<OutlierResults | undefined>(undefined);
  const [outlierConfig, setOutlierConfig] = useState<DBScanConfig | undefined>(undefined);

  // DBScan needs additional info about the data to work best. DBScan 'eps'
  // value depends on the data span (i.e. max-min) of the data over a period of time.
  const [dataSpan, setDataSpan] = useState<number | undefined>(undefined);

  // To match backend, MAD needs at least 24 hours of historical data to calculate a
  // suitable global, lower and upper median.
  const [dataDoubleMedian24Hour, setDataDoubleMedian24Hour] = useState<MADDoubleMedianData>({
    lowerMedian: NaN,
    globalMedian: NaN,
    upperMedian: NaN,
  });

  // Fetching at least 24 hours of data to estimate the global median for MAD
  // DBSCAN wants 7 days worth.
  const sampleDataTimeRange: TimeRange = useMemo(() => {
    const amountDays = algorithm === 'dbscan' ? 7 : 1;
    const to = algorithm === 'dbscan' ? dateTime(Date.now()) : timeRange.to;
    const from = cloneDeep(to).subtract(amountDays, 'day');

    return {
      to: to,
      from: from,
      raw: {
        to: to,
        from: from,
      },
    };
  }, [timeRange, algorithm]);

  // Run query again to fetch low-resolution data required by each algorithm. Note if
  // using MAD and incoming time range > 24 hours, can use NO-OP query to save work.
  //
  // Note!!! Typically you'd use `useQueryResult` to perform this request. However
  // there is a race condition with `useQueryResult` where the data returned by this query
  // is passed to the other useQueryResult output!! It appears the problem may lie in
  // `useQueryResult` using a singleton `QueryRunner`.
  //
  // To avoid this race bug, have made a simplified querier that uses its own `QueryRunner`
  //////////////////////////////////
  const [sampleData] = useOwnQueryResult(
    (algorithm === 'mad' && timeRangeAtLeast24Hours(timeRange)) || errorState !== undefined ? NOOPDataQuery : queries,
    1000, // hopefully enough data to be decent sample of the whole dataset
    sampleDataTimeRange,
    timeZone,
    datasourceUid
  );

  // Process the low-resolution data - each algorithm does its own thing.
  useEffect(() => {
    setOutlierConfig(undefined);
    const series = sampleData?.series;
    if (series === undefined) {
      setDataSpan(undefined);
      setDataDoubleMedian24Hour({ lowerMedian: NaN, globalMedian: NaN, upperMedian: NaN });
    } else {
      if (algorithm === 'dbscan') {
        setDataSpan(DBScan.getDataSpan(series));
      } else {
        // Process Dataframe list to get raw data and calculate median
        const rawSampleData = [];
        for (const s of series) {
          if (s.fields[1] !== undefined) {
            rawSampleData.push(s.fields[1].values);
          }
        }

        const last24HourDoubleMedian = MAD.computeDoubleMedian(rawSampleData);
        setDataDoubleMedian24Hour(last24HourDoubleMedian);
      }
    }
  }, [sampleData, algorithm]);

  // Do as much work as possible without applying `sensitivity` - validate the data and preprocess
  const preprocessedData = useMemo(() => {
    setOutlierResult(undefined);
    const error = alignedData?.errors?.[0];
    setErrorState(error?.message);
    if (error !== undefined) {
      return null;
    }
    if (alignedData?.series === undefined || alignedData.series.length === 0) {
      return null;
    }

    const alignedDataFrame = alignedData.series[0];
    if (alignedDataFrame === null || alignedDataFrame.fields === undefined) {
      return null;
    }

    // Check if not enough or too many series to even try, and abort if so.
    const frameCount = alignedDataFrame.fields.length - 1; // as frame 0 is time
    if (frameCount >= MAX_NUMBER_OF_TIME_SERIES) {
      setErrorState(
        `Too many series: Outlier detection has an upper limit of ${MAX_NUMBER_OF_TIME_SERIES} series. Please change your query or use a different metric.`
      );
      setOutlierResult(undefined);
      return null;
    } else if (frameCount < MIN_NUMBER_OF_TIME_SERIES) {
      setErrorState(
        'Not enough series: Outlier detection requires 3 or more series to work. Please change your query or use a different metric.'
      );
      setOutlierResult(undefined);
      return null;
    }

    try {
      if (algorithm === 'dbscan') {
        return DBScan.preprocess(alignedDataFrame);
      } else {
        setOutlierConfig(undefined);
        // If data spans at least 24 hours, let MAD calculate the median itself internally. Otherwise pass in the
        // 24 hour median that was calculated above.
        const dataDoubleMedian = timeRangeAtLeast24Hours(timeRange)
          ? { lowerMedian: NaN, globalMedian: NaN, upperMedian: NaN }
          : dataDoubleMedian24Hour;
        return MAD.preprocess(alignedDataFrame, dataDoubleMedian);
      }
    } catch (e) {
      // Below makes Typescript happy with catching Exceptions
      let message = '';
      if (typeof e === 'string') {
        message = e;
      } else if (e instanceof Error) {
        message = e.message;
      }
      setErrorState(`${message}. Please change your query or use a different algorithm.`);
      setOutlierResult(undefined);
      return null;
    }
  }, [alignedData, timeRange, algorithm, dataDoubleMedian24Hour]);

  useEffect(() => {
    // Results depend *only* on sensitivity and the preprocessed data. Changes to other states
    // will be captured by `preprocessedData`
    if (
      preprocessedData === null ||
      alignedData?.series === undefined ||
      sensitivity === null ||
      (algorithm === 'dbscan' && dataSpan === undefined)
    ) {
      return;
    }

    let results: OutlierResults;

    if (algorithm === 'dbscan') {
      const SENSITIVITY_PADDING = 1.1;
      // trim sensitivity to avoid epsilon being 0
      // why 1e-3? any lower (e.g. 1e-6), the epsilon is so small that everything is an outlier
      const sensitivityTrimmed = Math.min(sensitivity, 1 - 1e-3);
      const MIN_VALUE_FLOAT32 = 1.1754943508222875e-38; // use instead of Number.MIN_VALUE, that one gets converted to 0 in mlapi (float32)
      const epsilon = Math.max((1 - sensitivityTrimmed) * (dataSpan ?? 0.1) * SENSITIVITY_PADDING, MIN_VALUE_FLOAT32); // zero eps will fail so avoid.;
      setOutlierConfig({ epsilon });
      results = DBScan.run(preprocessedData as DBScanData, epsilon);
    } else {
      results = MAD.run(preprocessedData as MADData, sensitivity);
    }

    setOutlierResult(results);
  }, [preprocessedData, sensitivity, dataSpan]); // eslint-disable-line react-hooks/exhaustive-deps

  return [outlierResult, outlierConfig, errorState];
}

function timeRangeAtLeast24Hours(timeRange: TimeRange): boolean {
  return timeRange.to.unix() - timeRange.from.unix() > 3600 * 24;
}
