/*
 * File: text-viewer.utils.ts
 * Project: aiscaler-web
 * File Created: Tuesday, 26th October 2021 10:55:07 am
 * Author: v.anhphamd (v.anhphd@vinbrain.net)
 *
 * Copyright 2021 VinBrain JSC
 */

import { Collection } from "domain/common";
import { NERAnnotation, Token } from "domain/text-labeling";
import { Label } from "domain/text-labeling";
import { Line, SentenceModel } from "../../models/text-viewer.models";

export function buildLabels(labels: Label[]) {
  let itemEntities: Record<string, Label> = {};
  let itemIds: string[] = [];

  for (let label of labels) {
    itemEntities[label.id] = label;
    itemIds.push(label.id);
  }
  return {
    labelIds: itemIds,
    labelEntities: itemEntities,
  };
}

export function buildTokens(sentences: SentenceModel[], tokenizer: string) {
  let itemEntities: Record<string, Token> = {};
  let itemIds: string[] = [];
  let sentenceTokenIds: Record<number, string[]> = {};
  for (let sentence of sentences) {
    const { tokenIds, tokenEntities } = buildSentenceTokens(
      sentence,
      itemIds.length,
      tokenizer
    );
    itemIds = itemIds.concat(tokenIds);
    itemEntities = {
      ...itemEntities,
      ...tokenEntities,
    };
    sentenceTokenIds[sentence.sentenceIndex] = tokenIds;
  }
  return {
    sentenceTokenIds: sentenceTokenIds,
    tokenIds: itemIds,
    tokenEntities: itemEntities,
  };
}

export function buildSentenceTokens(
  sentence: SentenceModel,
  fromTokenIndex: number = 0,
  tokenizer: string
) {
  const itemEntities: Record<string, Token> = {};
  const itemIds = [];
  const regex = new RegExp(tokenizer, "g");
  const words = sentence.sentence.split(regex);
  let idx = fromTokenIndex;
  let currentIndex = sentence.startIndex;

  for (let word of words) {
    if (!word) continue;
    let startIndex = currentIndex;
    let endIndex = currentIndex + word.length - 1;
    currentIndex += word.length;

    const entity = {
      id: `${startIndex}:${endIndex}`,
      index: idx,
      sentenceIndex: sentence.sentenceIndex,
      text: word,
      startIndex,
      endIndex,
    };
    itemIds.push(entity.id);
    itemEntities[entity.id] = entity;
    idx++;
  }
  return {
    sentence: sentence,
    tokenIds: itemIds,
    tokenEntities: itemEntities,
  };
}

export function findEndTokenId(
  tokenIds: string[],
  tokenEntities: Record<string, Token>,
  startTokenId: number,
  endIndex: number
): number {
  let endTokenId = startTokenId;
  while (
    tokenEntities[tokenIds[endTokenId]].endIndex < endIndex &&
    endTokenId < tokenIds.length
  ) {
    endTokenId++;
  }
  return endTokenId;
}
export function findStartTokenId(
  tokenIds: string[],
  tokenEntities: Record<string, Token>,
  startIndex: number,
  left = 0,
  right = tokenIds.length - 1
): number {
  if (left === right) return left;
  if (
    tokenEntities[tokenIds[left]].startIndex < startIndex &&
    startIndex < tokenEntities[tokenIds[left]].endIndex
  )
    return left;
  if (
    tokenEntities[tokenIds[right]].startIndex < startIndex &&
    startIndex < tokenEntities[tokenIds[right]].endIndex
  )
    return right;
  const mid = Math.floor((left + right) / 2);
  if (tokenEntities[tokenIds[mid]].startIndex > startIndex) {
    return findStartTokenId(tokenIds, tokenEntities, startIndex, left, mid);
  } else if (tokenEntities[tokenIds[mid]].startIndex === startIndex) {
    return mid;
  } else if (tokenEntities[tokenIds[mid]].endIndex < startIndex) {
    return findStartTokenId(
      tokenIds,
      tokenEntities,
      startIndex,
      mid + 1,
      right
    );
  } else {
    return findStartTokenId(tokenIds, tokenEntities, startIndex, mid, right);
  }
}

export function findLargestLine(lines: Line[]): Line {
  let maxLine: Line | null = null;
  for (let line of lines) {
    if (maxLine === null) {
      maxLine = line;
    } else if (line.end.x - line.start.x > maxLine.end.x - maxLine.start.x) {
      maxLine = line;
    }
  }
  return maxLine as Line;
}

export function buildAnnotationTokenIds(
  annotation: NERAnnotation,
  tokenCollection: Collection<Token>
) {
  const { startIndex, endIndex } = annotation;
  const tokenIds: string[] = [];
  for (const tokenId of tokenCollection.allIds) {
    const token = tokenCollection.entities[tokenId];
    if (token.startIndex < startIndex || token.endIndex > endIndex) continue;
    tokenIds.push(tokenId as string);
  }
  return tokenIds;
}
