import JSZip from 'jszip';
import Pick from 'stream-json/filters/Pick';
import { streamValues } from 'stream-json/streamers/StreamValues';
import { chain } from 'stream-chain';
import pako from 'pako';

const readBuffer = 2 ** 10 * 32; // 32KB at a time
const groveHeaders = {
  GROVE_HEADER   : 1000,
  VARIABLE_HEADER: 1001
};

const M_SINGLE_TREE = 0;
const M_MART = 4;
const M_MARS = 5;
const M_RANDOM_FOREST = 7;
const M_LOGIT = 15;
const M_GPS = 17;
const M_OLS_REGRESSION = 18;
const M_PIPELINE_TN_ISLE = 20;
const M_PIPELINE_TN_RULESEEKER = 21;
const M_PIPELINE_OTHER = 22;

const RECENT_MODEL_TYPE = 10821;
const VARLIST_TARGET = 10661;
const VARLIST_LF = 10658; // this is for numeric variables forced to be categorical

export const DeploymentRoles = {
  Champion  : 'champion',
  Challenger: 'challenger'
};

const CommandStatus = {
  UpToDate   : 0,
  DataChanged: 1,
  DataDeleted: 2
};

const extractObjects = (zip, path, json_filter) => {
  return new Promise(async resolve => {
    const blob = await zip.file(path).async('blob');
    const order = [];
    const pipeline = chain([
      Pick.withParser({
        pathSeparator: '/',
        filter       : stack => {
          const stack_path = stack.join('/');
          const check = json_filter.findIndex(x => stack_path.startsWith(x));
          if (check !== -1) {
            order.push(check);
            return true;
          }
          return false;
        }
      }),
      streamValues()
    ]);
    const results = [];
    pipeline.on('data', ({ key, value }) => (results[order[key]] = value));
    pipeline.on('end', () => resolve(results));
    pipeline.on('error', err => {
      if (Object.keys(results).length === json_filter.length) {
        // we found what we were looking for we just resolve
        // we can get errors because we might have stopped feeding json and the parser thinks we are invalid
        resolve(results);
        return;
      }
      console.error(err);
    });
    let read = 0;
    while (read < blob.size && Object.keys(results).length !== json_filter.length) {
      const buffer = blob.slice(read, read + readBuffer);
      read += buffer.size;
      pipeline.input.write(new Uint8Array(await buffer.arrayBuffer()));
    }
    pipeline.input.end();
  });
};

export const extractMSSProjectModels = async file => {
  try {
    const project = await JSZip.loadAsync(file);
    if (!project) {
      return;
    }
    const metadataFile = project.file('/project_metadata_20.json') ? '/project_metadata_20.json' : project.file('/project_metadata.json') ? '/project_metadata.json' : '';
    if (!metadataFile) {
      return;
    }
    const [Commands, Models] = await extractObjects(project, metadataFile, ['Commands', 'Models']);
    if (!Models) {
      return [];
    }

    return await Promise.all(
      Models.Items.flatMap(({ Items }) =>
        Items.map(async ({ Id, Data }) => {
          const [metadata, variables] = await extractObjects(project, `${Data}.json`, ['Model/metadata', 'Model/variables']);
          const trees = metadata.output_group
            .map(group => {
              const { OutputGroups: [{ Title }] = [{}] } =
                Commands.Items.find(({ Id }) => group.output_group_id === Id) || {};
              return { id: group.pruning_id?.toString(), name: Title };
            })
            .filter(x => !!x.name);
          const isEventTrial = !!variables.event_column;
          const isGzlmNonEventTrial = metadata.model_type === 'binary_logistic_regression' && !isEventTrial;
          const [originalThreshold] = isGzlmNonEventTrial ? await extractObjects(project, `${Data}.json`,
            ['Model/model_information/input_state/binary_response_model_data/binary_threshold']) : [];

          const command = Commands.Items.find(command => command.Id === metadata.command_id);
          const commandOutOfDate = !command || command.Status !== CommandStatus.UpToDate;

          return {
            type     : metadata.model_type,
            id       : Id.toString(),
            source   : 'mss-project-model',
            treeId   : (trees.length > 0 && trees[0]?.id) || undefined,
            trees,
            supported: !!modelTypeMap[metadata.model_type] && !(metadata.model_type === 'binary_logistic_regression' && isEventTrial),
            name     : trees[0]?.name || '',
            commandOutOfDate,
            isEventTrial,
            ...(isGzlmNonEventTrial && { originalThreshold }),
          };
        })
      )
    );
  } catch (e) {
    console.error(e);
  }
};

const asciiDecoder = { decoder: new TextDecoder('ascii'), bytes: 1 };
const utf16Decoder = { decoder: new TextDecoder('utf-16'), bytes: 2 };

export const extractGroveModels = async file => {
  let fileCurrent = 12;
  const [intSize, compressionMethod, encryption] = new Int32Array(await file.slice(0, fileCurrent).arrayBuffer());
  if (intSize !== 1 && intSize !== 2) {
    return;
  }
  // we support no compress and zlib compress, punt on method 1 which is LZ_COMPRESSION
  if (compressionMethod !== 0 && compressionMethod !== 2) {
    return;
  }
  // i don't thik we need to worry about this but at one point in time SPM had encrpytion even the latest major releases haven't supported this
  if (encryption !== 10) {
    return;
  }
  const eightByteInt = intSize === 2;
  const readNextChunk = async () => {
    const [chunkSize, notSupported = 0] = new Int32Array(
      await file.slice(fileCurrent, (fileCurrent += eightByteInt ? 8 : 4)).arrayBuffer()
    );
    if (notSupported !== 0) {
      throw new Error('Beyond int32');
    }
    const buffer = await file.slice(fileCurrent, (fileCurrent += chunkSize)).arrayBuffer();
    return compressionMethod === 2 ? pako.inflate(new Uint8Array(buffer)).buffer : buffer;
  };
  let chunkCurrent, chunk;
  const getDataView = async size => {
    if (!chunk || chunkCurrent === chunk.byteLength) {
      chunk = await readNextChunk();
      chunkCurrent = 0;
    } else if (size + chunkCurrent > chunk.byteLength) {
      // crossing boundary of chunks
      const remaining = chunk.slice(chunkCurrent, chunk.byteLength);
      chunk = await readNextChunk();
      chunkCurrent = size + chunkCurrent - chunk.byteLength;
      const temp = new Uint8Array([...new Uint8Array(remaining), ...new Uint8Array(chunk.slice(0, chunkCurrent))]);
      return new DataView(temp.buffer);
    }
    const view = new DataView(chunk, chunkCurrent, size);
    chunkCurrent += size;
    return view;
  };
  const readInt = eightByteInt
    ? async () => {
      const view = await getDataView(8);
      if (view.getBigInt64) {
        // BigInt support
        return Number(view.getBigInt64(0, true));
      }
      const upper = view.getInt32(4, true);
      if (upper !== -1 && upper !== 0) {
        throw new Error('Beyond int32');
      }
      return view.getInt32(0, true);
    }
    : async () => Number((await getDataView(4)).getInt32(0, true));
  const readFloat = async () => Number((await getDataView(4)).getFloat32(0, true));
  const readDouble = async () => Number((await getDataView(8)).getFloat64(0, true));
  const readString = async ({ decoder, bytes } = utf16Decoder) => {
    const length = await readInt();
    if (length === 0) {
      return '';
    }
    const view = await getDataView(length * bytes);
    return decoder.decode(view.buffer.slice(view.byteOffset, view.byteOffset + length * bytes));
  };
  const readKeyValue = async (readKey, readValue) => ({ key: await readKey(), value: await readValue() });
  const readTerminatedArray = async readElement => {
    const values = [];
    while ((await readInt()) === 0) {
      values.push(await readElement());
    }
    return values;
  };
  const readFixedArray = async (count, readElement) => {
    const values = [];
    while (count-- > 0) {
      values.push(await readElement());
    }
    return values;
  };
  const readVector = async readElement => readFixedArray(await readInt(), readElement);

  if ((await readInt()) !== groveHeaders.GROVE_HEADER) {
    // grove header sentinel
    return;
  }
  const [major, minor, patch] = (await readString(asciiDecoder)).split('.').map(x => +x);
  const version = (major * 10 + minor) * 10 * 1000 + patch; // more than 10 minors or patches and this would be a real issue
  if (version <= 830000) {
    // assume spm 8+ grove
    return;
  }
  await readString(); // another version in wide chars
  if (!(await readInt())) {
    console.error('Expected session options');
    return;
  }
  const [variableLabels] = await readFixedArray(6, () => readTerminatedArray(() => readKeyValue(readString, readInt)));
  await readTerminatedArray(() => readKeyValue(readInt, readFloat));
  await readTerminatedArray(() => readKeyValue(readInt, readDouble));
  const ints = await readTerminatedArray(() => readKeyValue(readInt, readInt));
  await readTerminatedArray(() => readKeyValue(readInt, readInt));
  await readTerminatedArray(() => readKeyValue(readInt, readString));
  await readTerminatedArray(() => readKeyValue(readInt, () => readVector(readFloat)));
  await readTerminatedArray(() => readKeyValue(readInt, () => readVector(readDouble)));
  const intVectors = await readTerminatedArray(() => readKeyValue(readInt, () => readVector(readInt)));
  await readTerminatedArray(() => readKeyValue(readInt, () => readVector(readInt)));
  await readTerminatedArray(() => readKeyValue(readInt, () => readVector(readInt)));
  await readFixedArray(3, () => readTerminatedArray(readString));
  const [cart, treenet, mars, , rf, , , , , , , , , , linear_regression] = await readFixedArray(
    version > 820704 ? 28 : 27,
    readInt
  );
  if (cart + treenet + mars + rf + linear_regression < 1) {
    return;
  }
  let type = '';
  let supported = false;

  const { value: model_type } = ints.find(({ key }) => key === RECENT_MODEL_TYPE);
  const targetList = intVectors.find(({ key }) => key === VARLIST_TARGET);
  if (!targetList) {
    return [
      {
        id  : '1',
        name: '',
        supported,
        type
      }
    ];
  }
  const {
    value: [target]
  } = targetList;
  const { value: forcedCategorical } = intVectors.find(({ key }) => key === VARLIST_LF);
  const classification =
    forcedCategorical.includes(target) ||
    variableLabels.find(({ key, value }) => value === target && key.endsWith('$'));
  // Note: most models are indicated as "supported" here, since the client side can
  // only do a little digging into the grove. A more in-depth check of the model type
  // will be done on the server side with the SPM GVALIDATE command.
  switch (model_type) {
    case M_SINGLE_TREE:
      type = classification ? 'classification_tree' : 'regression_tree';
      supported = true;
      break;
    case M_MARS:
      type = classification ? 'classification_mars' : 'regression_mars';
      supported = true;
      break;
    case M_RANDOM_FOREST:
      type = classification ? 'classification_random_forest' : 'regression_random_forest';
      supported = true;
      break;
    case M_MART:
    case M_PIPELINE_TN_ISLE:
    case M_PIPELINE_TN_RULESEEKER:
    case M_PIPELINE_OTHER:
      type = classification ? 'classification_treenet' : 'regression_treenet';
      supported = true;
      break;
    case M_GPS:
      type = classification ? 'classification_gps' : 'regression_gps';
      supported = true;
      break;
    case M_LOGIT:
      // technically could be logit, probit or both see session item LOGIT_CONTROLS_LPMODE
      type = 'logit';
      supported = true;
      break;
    case M_OLS_REGRESSION:
      type = 'regression';
      supported = true;
      break;
    default:
      console.error(`Unknown type ${model_type}`);
      break;
  }
  return [
    {
      id  : '1',
      name: '',
      supported,
      type
    }
  ];
};

export const extractMSSWorksheetColumnNames = async (file, sheetIndex = 0) => {
  try {
    const project = await JSZip.loadAsync(file);
    if (!project) {
      return;
    }
    const sheet = project.file(`/sheets/${sheetIndex}/sheet.json`) && `/sheets/${sheetIndex}/sheet.json`;
    if (!sheet) {
      return;
    }
    const [Data] = await extractObjects(project, sheet, ['Data']);
    if (!Data) {
      return [];
    }

    return Promise.resolve(Data.Columns.reduce((acc, column) => {
      if (!column.WorksheetVarBody) {
        // Invalid column
        return acc;
      }
      const columnName = column.WorksheetVarBody.Name;
      if (column.WorksheetVarBody.Format.Key !== 0) {
        acc.push(columnName?.length > 0
          ? columnName
          : `Column_${column.VarId.VarId}${column.WorksheetVarBody.VarData?.VarDataBody?.HasTextData
            ? '-T'
            : column.WorksheetVarBody.Format?.Value?.TimeFmt
              ? '-D'
              : ''
              }`
        );
      }
      return acc;
    }, []));
  } catch (e) {
    console.error(e);
  }
};

// define the set of supported model types. if an uploaded file (.grv, .mpx)
// is not among these, it will be described as "unsupported".
export const modelTypeMap = {
  // MSS Model Types
  classification_tree         : 'CART® Classification',
  regression_tree             : 'CART® Regression',
  classification_treenet      : 'TreeNet® Classification',
  regression_treenet          : 'TreeNet® Regression',
  classification_random_forest: 'Random Forests® Classification',
  regression_random_forest    : 'Random Forests® Regression',
  classification_multi_model  : 'Discover Best Model (Binary Response)',
  regression_multi_model      : 'Discover Best Model (Continuous Response)',
  binary_logistic_regression  : 'Binary Logistic Regression',
  poisson_regression          : 'Poisson Regression',
  multiple_regression         : 'Multiple Regression',
  mars_regression             : 'MARS® Regression',
  // SPM  Model Types
  classification_mars         : 'MARS® Classification',
  regression_mars             : 'MARS® Regression',
  classification_gps          : 'GPS/Generalized Lasso Classification',
  regression_gps              : 'GPS/Generalized Lasso Regression',
  logit                       : 'Logistic Regression',
  regression                  : 'Linear Regression'
};
