
import ASCIIFolder from "fold-to-ascii";

const punctuationRegex = /[!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~¡¿—–]/g;
const standaloneEmDashRegex = /\s+--\s+/g;
const joinedEmDashRegex = /---/g;
// TODO make complete equivalence of punctuation here with punctuation in jw_script_processor.py
const standalonePunctuationRegex = /\s+([!"'()+,-./:;<=>?[\]^_`{|}~¡¿—–]+)\s+/g;
const trailingStandalonePunctuationRegex = /\s+([!"'()+,-./:;<=>?[\]^_`{|}~¡¿—–]+)\s+$/g;
const trailingPunctuationRegex = /([!"'()+,-./:;<=>?[\]^_`{|}~¡¿—–]+)\s*$/g;
const leadingPunctuationRegex = /^([!"'()+,-./:;<=>?[\]^_`{|}~¡¿—–]+)\s*/g;

const whitespaceRegex = /\s+/g;
const startingWhitespaceRegex = /^\s+/g;
const trailingWhitespaceRegex = /\s+$/g;


export function joinStandaloneEmDashes(str) {
  if (!str) {
    return str;
  }
  // TODO with replacing with space hyphenated words may count as two words, what is correct?
  return str.replace(standaloneEmDashRegex, '--- ');
}

export function restoreEmDashes(str) {
  if (!str) {
    return str;
  }
  return str.replace(joinedEmDashRegex, ' --');
}

export function joinStandalonePunctuation(str) {
  if (!str) {
    return str;
  }
  str = str.replace(standalonePunctuationRegex, '$1 ');
  // TODO not sure will handle linefeed correctly, look JS regex doc and test
  return str.replace(trailingStandalonePunctuationRegex, '$1');
  // TODO need to handle standalone quotes " ' differently open should join with following and closing with preceeding
  // TODO or should just have alarm and message when trying save edit with standalone quotes to simplify implementation?
}

export function normalizeWhiteSpace(str) {
  if (!str) {
    return str;
  }
  str = str.replace(whitespaceRegex,' ');
  if (str === ' ') {
    str = '';
  }
  str = str.replace(startingWhitespaceRegex,'');
  return str.replace(trailingWhitespaceRegex,'');
}

export function normalizePunctuation(str) {
  str = joinStandaloneEmDashes(str);
  return joinStandalonePunctuation(str);
}

export function normalizeTranscriptText(str) {
  str = normalizePunctuation(str);
  return normalizeWhiteSpace(str);
}

export function validateTranscriptText(str) {
  // TODO check for standalone quotes or other things not dealt with by automatic normalization
}

function getWords(str) {
  return str.split(' ');
}

export function getTranscriptWordsFromString(str) {
  if (!str) {
    return [];
  }
  str = normalizeTranscriptText(str);
  if (!str) {
    return [];
  }
  const words = getWords(str);
  return words.map(word => restoreEmDashes(word));
}

export function stripTrailingPunctuation(str) {
  if (!str) {
    return str;
  }
  return str.replace(trailingPunctuationRegex,'');
}

export function stripLeadingPunctuation(str) {
  if (!str) {
    return str;
  }
  return str.replace(leadingPunctuationRegex,'');
}

export function trimPunctuation(str) {
  str = stripTrailingPunctuation(str);
  return stripLeadingPunctuation(str);
}

export function strongNormalizeWord(word) {
  word = word.replace(punctuationRegex, '');
  word = ASCIIFolder.foldReplacing(word);
  return word.toLowerCase();
}

export function strongNormalizeWordArray(words) {
  return words.map((word) => strongNormalizeWord(word));
}
