let SRE = require("speech-rule-engine");

export const BLOCK_ELEMENTS = [
  "address",
  "article",
  "aside",
  "blockquote",
  "details",
  "dialog",
  "dd",
  "div",
  "dl",
  "dt",
  "fieldset",
  "figcaption",
  "figure",
  "footer",
  "form",
  "h1",
  "h2",
  "h3",
  "h4",
  "h5",
  "h6",
  "header",
  "hgroup",
  "hr",
  "li",
  "main",
  "nav",
  "ol",
  "p",
  "pre",
  "section",
  "table",
  "tr",
  "th",
  "td",
  "ul",
];

const ABBREVIATIONS = {
  en: [
    "A.M.",
    "Av.",
    "Ave.",
    "Blvd.",
    "Capt.",
    "Cmdr.",
    "Col.",
    "Cpl.",
    "Dr.",
    "Fr.",
    "Gen.",
    "Hon.",
    "Jr.",
    "Lt.",
    "Mr.",
    "Mrs.",
    "Ms.",
    "No.",
    "Ofc.",
    "P.M.",
    "Pl.",
    "Prof.",
    "Pvt.",
    "Rd",
    "Rep.",
    "Rev.",
    "Rt.",
    "Rte.",
    "Sen.",
    "Sgt.",
    "Sr.",
    "St.",
    "Ste.",
  ],
};

/**
 * Represents a Range of the document being processed and the character offset of its textual content
 * within the plain text utterance.
 */

export class Fragment {
  /**
   * @param {Range} range Range of a word or sentence.
   * @param {number} offset Character offset from the beginning of the plaintext utterance.
   * @param {String} [text] optional; if provided, this text is used as the Fragment text rather than
   *                        the Range's textual content.
   */
  constructor(range, offset, text = null) {
    /** @type {Range} DOM Range*/
    this.range = range;
    /** @type {number} 0-based character offset from the beginning of the plaintext utterance. */
    this.offset = offset;
    /** @type {string} Plain text content of the Fragment's DOM Range. */
    this.text = text || this.range.toString();
  }

  /**
   * True if the given character offset is contained within the fragment.
   * @param {number} offset Character offset within the plain text utterance
   * @returns {boolean}
   */
  contains(offset) {
    return offset >= this.offset && offset < this.offset + this.text.length - 1;
  }

  /**
   * True if the fragment's start index is within charIndex to (charIndex + charLength - 1)
   * within the plaintext utterance.
   * @param {number} charIndex start index within the utterance
   * @param {number} charLength length of word being spoken within the utterance
   * @returns {boolean}
   */
  startsWithin(charIndex, charLength) {
    return this.offset >= charIndex && this.offset < charIndex + charLength;
  }

  /**
   * Returns a range spanning two fragments.
   * @param {Fragment} fragment1 Start fragment
   * @param {Fragment} fragment2 End fragment
   * @returns {Range} DOM range spanning start of fragment1 and end of fragment 2
   */
  static spannedRange(fragment1, fragment2) {
    if (fragment1 === fragment2) {
      // No need to create a new range if both args are the same object
      return fragment1.range;
    } else {
      const doc = fragment1.range.startContainer.ownerDocument;
      const r = doc.createRange();
      r.setStart(fragment1.range.startContainer, fragment1.range.startOffset);
      r.setEnd(fragment2.range.endContainer, fragment2.range.endOffset);
      return r;
    }
  }
}

export class TtsSentence {
  constructor() {
    /** @type {Range} */
    this.range = null;

    /** @type {String} */
    this.utterance = "";

    /** @type {Array<Fragment>} Array of Fragment objects typically representing words. */
    this.words = [];
  }

  initRange(node) {
    if (this.range === null) {
      this.range = node.ownerDocument.createRange();
    } else if (
      this.range.startContainer.ownerDocument !== node.ownerDocument ||
      this.range.endContainer.ownerDocument !== node.ownerDocument
    ) {
      throw new Error(
        "Cannot set start and end nodes from different documents."
      );
    }
  }

  setStart(node, offset) {
    this.initRange(node);
    this.range.setStart(node, offset);
  }

  setEnd(node, offset) {
    this.initRange(node);
    this.range.setEnd(node, offset);
  }

  setStartAfter(node) {
    this.initRange(node);
    this.range.setStartAfter(node);
  }

  setEndAfter(node) {
    this.initRange(node);
    this.range.setEndAfter(node);
  }

  setStartBefore(node) {
    this.initRange(node);
    this.range.setStartBefore(node);
  }

  setEndBefore(node) {
    this.initRange(node);
    this.range.setEndBefore(node);
  }

  /**
   * @param {Range} range Range to be treated as a word.
   * @param {String} [text] If provided, this overrides the text content of the range.
   */
  addWord(range, text = null) {
    const word = new Fragment(range, this.utterance.length, text);
    this.words.push(word);
    this.utterance += word.text;
  }

  /**
   * @param {String} text Text to add to sentence utterance that does not correspond to members
   *                      of the words array (e.g., image captions, list and table structure
   *                      announcements, etc.)
   */
  extendUtterance(text) {
    this.utterance += text;
  }

  /**
   * Returns the index of the words array that matches the word at the specified offset in the
   * plaintext utterance. A starting index may be provided in the fromIndex argument to begin
   * searching from a midpoint in the array.
   * @deprecated wordsIn is more flexible, and can return multiple fragments if needed.
   * @param {number} offset Character offset in the plaintext utterance
   * @param {number} [fromIndex] The index to start the search at
   * @returns {number}
   */
  wordAt(offset, fromIndex = 0) {
    const s = Math.max(fromIndex, 0);
    for (let i = s; i < this.words.length; i++) {
      if (this.words[i].contains(offset)) {
        return i;
      } else if (offset < this.words[i].offset) {
        // This is a fallback to prevent us from unnecessarily seeking ahead
        return i - 1;
      }
    }
    return -1;
  }

  /**
   * Finds the indexes of the words array whose initial char indexes are spanned by the spoken
   * portion of the plaintext utterance. A starting index may be provided in the fromIndex argument
   * to begin searching from a midpoint in the array.
   * @param {number} charIndex The character index in the utterance of the word(s) being spoken
   * @param {number} charLength The number of characters being spoken; may be zero, in which case
   *                            matching will be based solely on whether charIndex appears within
   *                            a word fragment.
   * @param {number} [fromIndex] The index to start the search at
   * @returns {Array<number>} Indexes of words overlapping with charIndex to charIndex + charLength
   */
  wordsIn(charIndex, charLength, fromIndex = 0) {
    const s = Math.max(fromIndex, 0);
    const result = [];
    // Safari synths don't provide charLength on boundary events. When charLength is available,
    // we use the more precise startsWithin, but just use the fuzzier contains if we don't.
    const hasLength = typeof charLength !== "undefined";
    for (let i = s; i < this.words.length; i++) {
      if (
        (hasLength && this.words[i].startsWithin(charIndex, charLength)) ||
        this.words[i].contains(charIndex)
      ) {
        result.push(i);
      } else if (
        (hasLength && charIndex + charLength < this.words[i].offset) ||
        charIndex < this.words[i].offset
      ) {
        break;
      }
    }
    return result;
  }
}

/**
 * Extracts sentences from a DOM Node.
 * @param {Node} node DOM Node to extract sentences from.
 * @param {Object} [options] Optional options object to customize extraction behavior.
 * @returns {Array<TtsSentence>}
 */
export function extractSentences(node, options = {}) {
  const BOUNDARY_CHARS = /[.,](?=\s|$)|[!?\s";:\u201c\u201d\u2013\u2014]/;
  const SENTENCE_TERMINATORS = /[.?!]/;

  // Define book-keeping variables

  /** @type {Document} Document that the Node belongs to. */
  let doc = node.ownerDocument;

  /** @type {TtsSentence} Represents the sentence currently being constructed. */
  let sentenceInProgress = null;

  /** @type {Range} Range representing the word currently being constructed. */
  let wordInProgress = null;

  /** @type {boolean} Voice image descriptions. */
  const voiceImageDescriptions = options.voiceImageDescriptions || false;

  /** @type {boolean} Voice page numbers (epub:type='pagebreak'). */
  const voicePageNumbers = options.voicePageNumbers || false;

  /** @type {RegExp} Abbreviation matcher. */
  const abbreviationMatcher = new RegExp(
    `(^|\\s)(${ABBREVIATIONS[options.language || "en"]
      .map((a) => a.replace(".", "\\."))
      .join("|")})$`,
    "i"
  );

  const result = [];
  process(node);
  return result;

  /**
   * @param {Node} node
   */
  function process(node) {
    if (node.nodeType === Node.ELEMENT_NODE) {
      processElement(node);
    } else if (node.nodeType === Node.TEXT_NODE) {
      processText(node);
    }
  }

  /**
   * @param {Element} element
   */
  function processElement(element) {
    if (element.hidden) {
      return;
    }

    const childNodes = element.childNodes;

    // Block elements are implicitly sentence starts
    if (isBlockElement(element)) {
      breakSentenceAtElement(element);

      // Block elements may have an announcement.
      blockStartAnnouncement(element);

      // Start a new sentence
      sentenceInProgress = new TtsSentence();
      sentenceInProgress.setStart(element, 0);
    }

    specialElementAnnouncement(element);

    if (element.tagName.toLowerCase() !== "mjx-container") {
      childNodes.forEach(process);
    }

    // Block elements are implicitly sentence ends
    if (isBlockElement(element)) {
      breakSentenceAfterElement(element);
      sentenceInProgress = null;

      // Block elements may have an end announcement
      blockEndAnnouncement(element);
    }
  }

  /**
   * @param {Node} textNode
   */
  function processText(textNode) {
    const text = textNode.textContent;
    let substrPos = 0;

    while (substrPos < text.length) {
      // If there is no current word in progress, seek to first non-whitespace character and
      // use that as the first character of a new word.
      if (wordInProgress === null) {
        const firstWordCharacter = /(\s*)(\S)/.exec(text.slice(substrPos));
        if (firstWordCharacter != null) {
          const firstWordCharacterOffset =
            firstWordCharacter.index + firstWordCharacter[1].length;

          wordInProgress = doc.createRange();
          wordInProgress.setStart(
            textNode,
            firstWordCharacterOffset + substrPos
          );

          // If no words have been put into the sentence in
          // progress yet, or one does not yet exist, update
          // its start position to match
          if (sentenceInProgress === null) {
            sentenceInProgress = new TtsSentence();
          }

          if (sentenceInProgress.words.length === 0) {
            sentenceInProgress.setStart(textNode, wordInProgress.startOffset);
          }

          // Collapse any captured whitespace into a single space
          if (
            sentenceInProgress.utterance.length > 0 &&
            firstWordCharacter[1].length > 0
          ) {
            sentenceInProgress.extendUtterance(" ");
          }

          // substrPos += firstWordCharacter.index;
          substrPos += firstWordCharacterOffset;
        } else {
          substrPos = text.length;
        }
      }

      // seek ahead to find boundary characters
      if (wordInProgress) {
        const index = text.slice(substrPos).search(BOUNDARY_CHARS);
        if (index > -1) {
          const boundary = text[index + substrPos];
          wordInProgress.setEnd(textNode, index + substrPos);

          if (wordInProgress.toString().length > 0) {
            sentenceInProgress.addWord(wordInProgress);
          }

          // Having processed a boundary, we should append it to the main utterance
          // even if no word was found.
          // Exception for newlines, which sound like full sentence stops.
          wordInProgress = null;
          sentenceInProgress.extendUtterance(
            boundary === "\n" ? " " : boundary
          );

          // test against sentence boundaries
          if (
            SENTENCE_TERMINATORS.test(boundary) &&
            !abbreviationMatcher.test(sentenceInProgress.utterance)
          ) {
            sentenceInProgress.setEnd(textNode, index + substrPos + 1);
            if (sentenceInProgress.utterance.length > 0) {
              result.push(sentenceInProgress);
            }
            sentenceInProgress = null;
          }

          substrPos += index + 1;
        } else {
          substrPos = text.length;
        }
      }
    }
  }

  function processMath(textNode) {
    const mathNode = textNode.getElementsByTagName("math")[0];
    const mathSpeech = SRE.toSpeech(mathNode.innerHTML);
    if (mathSpeech !== null) {
      const mathNode = textNode.querySelector("mjx-math");
      const range = doc.createRange();
      range.setStartBefore(mathNode);
      range.setEndAfter(mathNode);
      if (sentenceInProgress === null) {
        sentenceInProgress = new TtsSentence();
        sentenceInProgress.setStart(range.startContainer, range.startOffset);
      }
      sentenceInProgress.addWord(range, mathSpeech);
    }
  }

  /**
   * Pushes a non-highlighting TtsSentence announcing the beginning of the element into the result array.
   * @param {Element} element
   */
  function blockStartAnnouncement(element) {
    // A block announcement gets its own TtsSentence.
    const t = new TtsSentence();
    t.setStartBefore(element);
    t.setEndAfter(element);

    switch (element.tagName.toLowerCase()) {
      case "table":
        t.extendUtterance("Begin table.");
        break;
      case "tr":
        t.extendUtterance("Begin table row.");
        break;
      case "th":
        t.extendUtterance("Table heading.");
        break;
      case "td":
        t.extendUtterance("Table cell.");
        break;
      case "ol":
        const olCount = countListItems(element);
        t.extendUtterance(`Begin ordered list, ${olCount} items.`);
        break;
      case "ul":
        const ulCount = countListItems(element);
        t.extendUtterance(`Begin list, ${ulCount} items.`);
        break;
      case "li":
        const listInfo = locateItemInList(element);
        t.extendUtterance(
          `List item ${listInfo.index + 1} of ${listInfo.length}.`
        );
        break;
      default:
      // *shrug*
    }

    if (t.utterance.length > 0) {
      result.push(t);
    }
  }

  /**
   * Pushes a non-highlighting TtsSentence announcing the end of the element into the result array.
   * * @param {Element} element
   */
  function blockEndAnnouncement(element) {
    // A block announcement gets its own TtsSentence.
    const t = new TtsSentence();
    t.setStartBefore(element);
    t.setEndAfter(element);

    switch (element.tagName.toLowerCase()) {
      case "table":
        t.extendUtterance("End table.");
        break;
      case "tr":
        t.extendUtterance("End table row.");
        break;
      case "ol":
        t.extendUtterance("End ordered list.");
        break;
      case "ul":
        t.extendUtterance("End list.");
        break;
      default:
      // *shrug*
    }

    if (t.utterance.length > 0) {
      result.push(t);
    }
  }

  /**
   * Breaks current word and sentence and pushes it into the result array,
   * starting a new sentence in the process.
   * @param {*} element Element boundary to use to break sentence accumulator
   * @param {*} [breakBefore] Optional; set to false to break after element.
   */
  function breakSentenceAtElement(element, breakBefore = true) {
    if (sentenceInProgress !== null) {
      if (wordInProgress !== null) {
        if (breakBefore) {
          wordInProgress.setEndBefore(element);
        } else {
          wordInProgress.setEndAfter(element);
        }
        if (wordInProgress.toString().length > 0) {
          sentenceInProgress.addWord(wordInProgress);
          wordInProgress = null;
        }
      }
      if (breakBefore) {
        sentenceInProgress.setEndBefore(element);
      } else {
        sentenceInProgress.setEndAfter(element);
      }
      if (sentenceInProgress.utterance.length > 0) {
        result.push(sentenceInProgress);
      }
    }
  }

  /**
   * Convenience method (maybe clearer?) to break a sentence after
   * @param {*} element
   * @returns
   */
  function breakSentenceAfterElement(element) {
    return breakSentenceAtElement(element, false);
  }

  /**
   * Handles any special element announcements for inline-type content.
   * @param {Element} element
   * @returns {String} String utterance for the element.
   */
  function specialElementAnnouncement(element) {
    switch (element.tagName.toLowerCase()) {
      case "img":
        // Handle image elements
        if (voiceImageDescriptions) {
          // whitespace padding separates image announcements from adjacent content
          const imageAnnouncement = ` image ${
            element.getAttribute("alt") || ""
          } `;
          const imageRange = doc.createRange();
          imageRange.setStartBefore(element);
          imageRange.setEndAfter(element);

          if (element.parentElement.tagName === "FIGURE") {
            // Images that are children of figure elements are blocks that are their own sentence.
            const figureSentence = new TtsSentence();
            figureSentence.setStartBefore(element);
            figureSentence.addWord(imageRange, imageAnnouncement);
            figureSentence.setEndAfter(element);
            result.push(figureSentence);
          } else {
            // All other images are considered inline
            if (sentenceInProgress === null) {
              // make a new sentence, I guess
              sentenceInProgress = new TtsSentence();
              sentenceInProgress.setStartBefore(element);
              sentenceInProgress.addWord(imageRange, imageAnnouncement);
              breakSentenceAfterElement(element);
              sentenceInProgress = null;
            } else {
              sentenceInProgress.addWord(imageRange, imageAnnouncement);
            }
          }
        }
        break;
      case "span":
      case "div":
        // Handle spans/DIVs with EPUB role pagebreak
        // These are formatted as (and behave like) block elements
        if (
          voicePageNumbers &&
          (element.getAttribute("epub:type") === "pagebreak" || element.getAttribute("role") === "doc-pagebreak")
        ) {
          // We have a custom announcement, regardless of text content.
          const pagebreakLabel = element.getAttribute("title") || element.getAttribute("aria-label");
          const pagebreakAnnouncement = `Pagebreak ${pagebreakLabel ? `page ${pagebreakLabel}` : ''}`;
          const pagebreakRange = doc.createRange();
          pagebreakRange.setStartBefore(element);
          pagebreakRange.setEndAfter(element);

          if (sentenceInProgress !== null) {
            breakSentenceAtElement(element);
          }
          sentenceInProgress = new TtsSentence();
          sentenceInProgress.setStartBefore(element);
          sentenceInProgress.addWord(pagebreakRange, pagebreakAnnouncement);
          breakSentenceAfterElement(element);
          sentenceInProgress = null;
        }
        break;
      case "mjx-container":
        processMath(element);
        break;
      default:
      // *shrug*
    }
  }

  /**
   * Given an <li> element, find its index position in its owning list and
   * the total number of items in the list.
   * @param {Element} element An <li> element
   * @returns {Object} Object with index property indicating the element's position in list
   *                   and a length property indicating the number of items in the list.
   */
  function locateItemInList(element) {
    let index = -1;
    let length = 0;
    const children = element.parentElement.children;
    for (let i = 0; i < children.length; i++) {
      const c = children[i];
      if (c.tagName.toLowerCase() === "li") {
        if (index === -1 && c === element) {
          index = i;
        }
        length++;
      }
    }
    return { index: index, length: length };
  }

  /**
   * Counts the number of <li> children of an element.
   * @param {Element} element
   * @returns {Number} Count of <li> children of the given element.
   */
  function countListItems(element) {
    let count = 0;
    const children = element.children;
    for (let i = 0; i < children.length; i++) {
      const c = children[i];
      if (c.tagName.toLowerCase() === "li") {
        count++;
      }
    }
    return count;
  }
}

/**
 * @param {Element} element
 * @returns {boolean} True if the element is typically formatted using block display.
 */
export function isBlockElement(element) {
  return BLOCK_ELEMENTS.indexOf(element.tagName.toLowerCase()) > -1;
}
