import {
    TtsSentence, Fragment, extractSentences, isBlockElement
} from "./Benespeak.js";

const WORD_CHAR_MATCHER = /\w/;
class TtsPlayer {

    static get SEEK_FORWARD() {
        return 0;
    }

    static get SEEK_BACKWARD() {
        return 1;
    }

    constructor(options={}) {
        /** @type {SpeechSynthesisVoice} */
        this.voice = options.voice || null;
        this.rate = options.rate || 1;
        this.voiceImageDescriptions = options.voiceImageDescriptions || true;
        this.voicePageNumbers = options.voicePageNumbers || false;

        /** @type {Element} Top-most element that the player will attempt to play. Defaults to document.body. */
        this.ceiling = options.ceiling || document.body;

        const noOp = () => {};

        /** @type {Function} */
        this.highlightWord = options.highlightWord || noOp;

        /** @type {Function} */
        this.clearWordHighlight = options.clearWordHighlight || noOp;

        /** @type {Function} */
        this.highlightSentence = options.highlightSentence || noOp;

        /** @type {Function} */
        this.clearSentenceHighlight = options.clearSentenceHighlight || noOp;

        /** @type {Function} */
        this.updateViewerPosition = options.updateViewerPosition || noOp;

        /** @type {Function} */
        this.onSpeechEnd = options.onSpeechEnd || noOp;

        /** @type {Function} */
        this.onOutOfElements = options.onOutOfElements || options.onSpeechEnd || noOp;

        /** @type {Element} */
        this.activeElement = null;

        /** @type {Array<TtsSentence>} */
        this.sentences = [];

        /** @type {Number} */
        this.currentSentence = -1;

        /** @type {SpeechSynthesisUtterance} */
        this.utterance = null;

        // shorter ref to speech synthesis global
        this.synth = options.synth || window.speechSynthesis;

        // Internal state tracking
        this.interrupted = false;
        this.stopped = false;
    }


    /**
     * Starts TTS playback from the sentence closest to the click coordinates in a
     * double-clicked target.
     * @param {MouseEvent} event
     */
    playFromMouseEvent(event) {
        // Interrupt any ongoing speech
        this.interrupt();

        // Grab the closest text anchor's parent element and seek from there.
        const selection = event.target.ownerDocument.defaultView.getSelection();

        // We need the anchor node from the selection; if no selection is present, fall back to using the event target.
        const anchorNode = selection.anchorNode || event.target;

        // Here's the magic:
        // * If event.target is a link, end speech immediately.
        // * If event.target is an atomic announceable element, start from there
        // * If anchorNode is a text node, start content search from its parent element
        // * If anchorNode is an element with children, start search from child at selection.anchorOffset
        // * If anchorNode is a childless element, start search from that element

        let node = null;
        if (event.target.tagName === 'A') {
            this.onSpeechEnd();
            return;
        } else if (this._isAnnounceableElement(event.target)) {
            node = event.target;
        } else if (anchorNode.nodeType === Node.TEXT_NODE) {
            node = anchorNode.parentElement;
        } else if (anchorNode.childNodes.length > 0) {
            const offset = Math.min(selection.anchorOffset, anchorNode.childNodes.length - 1);
            node = anchorNode.childNodes.item(offset);
        } else {
            node = anchorNode;
        }

        // With node in hand, we can now find the closest readable ancestor.
        const targetElement = this._findClosestReadableAncestor(node);

        // Mouse events should not trigger playback on the ceiling element.
        if (targetElement === null || targetElement === this.ceiling) {
            this._clearData();
        } else {
            this._generateSentenceData(targetElement);
        }

        // By default, read from the beginning of the sentence set
        let idx = 0;

        // If we have sentence data, and the selection contains a valid range,
        // check if the range is inside any of the sentence ranges.
        if (this.sentences?.length > 0 && selection.rangeCount === 1) {
            const rangeFromSelection = selection.getRangeAt(0);
            let i = 0;
            while (i < this.sentences.length) {
                const r = this.sentences[i].range;
                if (rangeFromSelection.compareBoundaryPoints(Range.START_TO_START, r) >= 0 &&
                    rangeFromSelection.compareBoundaryPoints(Range.END_TO_START, r) <= 0
                ) {
                    // Found the sentence; record and move on.
                    idx = i;
                    break;
                }
                i++;
            }
        }

        // Delegate to play method; if no sentences were found for targetElement,
        // the play method will attempt to advance to the next readable element,
        // and exits cleanly when no more elements can be consumed.
        this.play(targetElement, idx);
    }

    /**
     * Starts TTS playback from the closest readable Element ancestor of the given DOM node.
     * @param {Node} node
     */
    playFromNode(node) {
        // Interrupt any ongoing speech
        this.interrupt();

        // find closest element parent
        this.play(this._findClosestReadableAncestor(node));
    }

    /**
     * Starts TTS playback from the specified element.
     * @param {Element} element Element to read.
     * @param {Number} [index] Index of sentence to start reading from.
     */
    play(element, index = 0) {
        this._generateSentenceData(element);

        if (this.sentences.length > 0) {
            this._speakUtterance(index);
        } else {
            // No sentences found, try seeking again.
            const nextReadableElement = this._seekToNextReadableElement(element);
            if (nextReadableElement) {
                this.play(nextReadableElement);
            } else {
                this.onOutOfElements();
            }
        }
    }

    /**
     * Resets TTS sentence and position data. This differs from stop() in that it *explicitly*
     * resets internal data. stop() only implicitly clears TTS data as a result of triggering
     * an onSpeechEnd event.
     */
    reset() {
        this._clearData();
    }

    /**
     * Generates sentence data from the specified element.
     * @param {Element} element
     */
    _generateSentenceData(element) {
        if (element !== this.activeElement) {
            console.log(`Generating new data for ${element.tagName}#${element.getAttribute('id')}`);
            this.activeElement = element;
            this.sentences = extractSentences(element, {
                voiceImageDescriptions: this.voiceImageDescriptions,
                voicePageNumbers: this.voicePageNumbers
            });
            console.log(this.sentences);
        }
    }

    /**
     * Speaks the sentence at startIndex. Negative start indexes go from the end of the
     * sentence list (e.g. _speakUtterance(-1) speaks the last sentence).
     * @param {Number} startIndex
     */
    _speakUtterance(startIndex) {
        // timers to pause and resume speech every 10 seconds
        // allows for google voices to not stop in middle of speech
        var timeout = setInterval(function(player){
            if (player?.voice?.name.startsWith("Google")) {
                player.synth.pause();
                player.synth.resume();
            }
        }, 10000, this);
        const idx = (startIndex >= 0) ? startIndex : this.sentences.length + startIndex;
        const s = this.sentences[idx];
        const u = new SpeechSynthesisUtterance(s.utterance);
        u.voice = this.voice;
        u.rate = this.rate;
        u.btIdx = idx;
        const handler = this.makeTtsEventHandler(s, idx, this, timeout);
        u.addEventListener('start', handler);
        u.addEventListener('boundary', handler);
        u.addEventListener('error', handler);
        u.addEventListener('end', handler);
        this.synth.speak(u);
    }

    resume() {
        this.synth.resume();
    }

    pause() {
        this.synth.pause();
    }

    ffwd() {
        this.interrupt();
        const newIndex = this.currentSentence + 1;

        if (newIndex < this.sentences.length) {
            console.log(`Skipping to sentence ${newIndex}`);
            this.play(this.activeElement, newIndex);
        } else {
            const nextReadableElement = this._seekToNextReadableElement(this.activeElement);
            if (nextReadableElement !== null) {
                this.play(nextReadableElement);
            } else {
                // No more readable nodes
                this.reset();
                this.onOutOfElements();
            }
        }

    }

    rew() {
        this.interrupt();
        const newIndex = this.currentSentence - 1;

        if (newIndex >= 0) {
            console.log(`Rewinding to sentence ${newIndex}`);
            this.play(this.activeElement, newIndex);
        } else {
            const nextReadableElement = this._seekToNextReadableElement(this.activeElement, TtsPlayer.SEEK_BACKWARD);
            if (nextReadableElement !== null) {
                this.play(nextReadableElement, -1);
            } else {
                // No more readable nodes
                this.onSpeechEnd();
            }
        }
    }

    /**
     * Cancels all utterances in queue and sets the stopped flag. Typically this will clear out
     * stored player state.
     */
    stop() {
        this.stopped = true;
        this.synth.cancel();
    }

    /**
     * Cancels all utterances in queue and sets the interrupted flag. The interrupt flag
     * is used in ffwd/rew to indicate to the player that it should hang on to player state,
     * since we are likely to be seeking within the same active element.
     */
    interrupt() {
        this.interrupted = true;
        this.synth.cancel();
    }

    /**
     * Handler for end-of-utterance event. Since Web Speech doesn't make a distinction between
     * the utterance coming to a natural stop or being manually canceled/interrupted, we need
     * to perform some checks to decide the correct course of action.
     */
    handleSentenceEnd() {
        if (this.stopped) {
            // Manually stopped. Data should be cleared out.
            this._clearData();
            this.onSpeechEnd();
        } else if (!this.interrupted) {
            // Not stopped or interrupted, which means that the utterance came to a natural stop.
            if (this.currentSentence + 1 < this.sentences.length) {
                // If there are more sentences remaining, play the next one
                this.play(this.activeElement, this.currentSentence + 1);
            } else {
                // If we're out of sentences, see if there are any more elements to play.
                const nextReadableElement = this._seekToNextReadableElement(this.activeElement);
                if (nextReadableElement !== null) {
                    this.play(nextReadableElement);
                } else {
                    // No more readable nodes
                    this._clearData();
                    this.onOutOfElements();
                }
            }
        }
    }

    /**
     * @param {TtsSentence} s Sentence data
     * @param {Number} i Index of sentence within sentence array
     * @param {TtsPlayer} player TtsPlayer object
     * @returns {Function} Handler function for SpeechSynthesisUtterance events
     */
    makeTtsEventHandler(s, i, player, timeout) {
        return function(event) {
            switch(event.type) {
                case 'start':
                    player.highlightSentence(s.range);
                    player.updateViewerPosition(s.range);
                    player.currentSentence = i;
                    player.interrupted = false;
                    player.stopped = false;
                    break;
                case 'boundary':
                    // Don't do any of this if speech is stopped or interrupted
                    if (!player.stopped && !player.interrupted) {
                        if (event.name === 'word') {
                            const wordIndexes = s.wordsIn(event.charIndex, event.charLength);
                            player.clearWordHighlight();
                            if (wordIndexes.length > 0) {
                                const range = Fragment.spannedRange(
                                    s.words[wordIndexes[0]], s.words[wordIndexes[wordIndexes.length - 1]]);
                                player.highlightWord(range);
                                player.updateViewerPosition(range);
                            }
                        }    
                    }
                    break;
                case 'error':
                case 'end':
                    player.clearWordHighlight();
                    player.clearSentenceHighlight();
                    player.handleSentenceEnd();
                    clearInterval(timeout);
                    break;
            }
        }
    }

    /**
     * Locates the closest Element ancestor of the provided Node that contains readable content.
     * @param {Node} fromNode
     * @returns {Element} Closest readable element ancestor
     */
    _findClosestReadableAncestor(fromNode) {
        if (fromNode.nodeType === Node.ELEMENT_NODE &&
            (this._isAtCeiling(fromNode)
                || (isBlockElement(fromNode) && this._isAtomic(fromNode))
                || this._isAnnounceableElement(fromNode)
            )
        ) {
            return fromNode;
        }
        const parentElement = fromNode.parentElement;
        return (parentElement !== null) ? this._findClosestReadableAncestor(parentElement) : null;
    }


    /**
     * Bidirectional search for readable elements in a document tree.
     * @param {Element} fromElement
     * @param {Number} direction
     * @returns {Element} Next Element containing readable content, or null if no following readable element exists.
     */
    _seekToNextReadableElement(fromElement, direction = TtsPlayer.SEEK_FORWARD) {
        if (fromElement === null || this._isAtCeiling(fromElement)) {
            // We have run out of available elements, or are at the top of the readable subtree.
            return null;
        }

        let searchResult = null;

        const sibling = (direction === TtsPlayer.SEEK_BACKWARD) ?
            fromElement.previousElementSibling : fromElement.nextElementSibling;

        if (sibling !== null) {
            // Try looking for readable content in the adjacent sibling first
            searchResult = this._dfs(sibling, direction);

            if (searchResult !== null) {
                // We have a match, return it.
                return searchResult;
            } else {
                // No match, try out-and-up again from the sibling
                return this._seekToNextReadableElement(sibling, direction);
            }
        } else {
            // No siblings. Try going back up a level
            return this._seekToNextReadableElement(fromElement.parentElement, direction);
        }
    }

    /**
     * Performs a depth-first search on the subtree rooted at el, looking for an element
     * with readable content that cannot be broken down further.
     * @param {Element} el
     * @param {Number} direction
     * @returns {Element} The first element found in a DFS that can be read.
     */
    _dfs(el, direction) {
        if (this._containsReadableContent(el)) {
            // This is promising! There's content in here we can use.
            if (this._isAtomic(el)) {
                // "Atomic" elements cannot be further broken down. We're done. Return this element.
                return el;
            } else if (el.children.length > 0) {
                // There's readable content but the element contains more children. We should
                // traverse the children in order to find the smallest possible unit of readable content.

                // Initialize either to first or last child depending on the seek direction
                let nextChild = el.children[(direction === TtsPlayer.SEEK_BACKWARD) ? el.children.length - 1 : 0];

                // Loop until either a readable result is returned, or we run out of children
                while (nextChild !== null) {
                    // Recurse into this child to see if we can find a readable atomic element.
                    const searchResult = this._dfs(nextChild, direction);
                    if (searchResult !== null) {
                        return searchResult;
                    } else {
                        // Still haven't found the content we're looking for, move to the next child.
                        nextChild = (direction === TtsPlayer.SEEK_BACKWARD) ?
                            nextChild.previousElementSibling : nextChild.nextElementSibling;
                    }
                }

            }
        }
        // If we find ourselves here, it's because there was no readable content, and no children.
        return null;
    }

    /**
     * @param {Element} element
     * @returns {boolean} True if the element is non-hidden, visible, and contains textual content
     */
    _containsReadableContent(element) {
        // Only operate on non-hidden, visible elements
        // TODO: Find out if we still care about the deprecated annotation structural semantic,
        //       and find some good examples for test suite.
        if (element.hidden === false && element.ownerDocument.defaultView.getComputedStyle(element).display !== 'none') {
            // Possible readable nodes
            if (
                (element.textContent.search(WORD_CHAR_MATCHER) > -1)
                    || this._isAnnounceableElement(element)
                    || (element.getElementsByTagName('img').length > 0)
            )
            {
                return true;
            }
        }
        return false;
    }

    /**
     * @param {Element} element
     * @returns {boolean} true if the element is as low as we can practically go in the DOM tree.
     *                    Generally this means the element contains non-empty text nodes, non-block
     *                    children, or has no children.
     */
    _isAtomic(element) {
        const childNodes = element.childNodes
        if (childNodes.length > 0) {
            for (let i = 0; i < childNodes.length; i++) {
                const child = childNodes[i];
                if (child.nodeType === Node.TEXT_NODE && child.textContent.search(WORD_CHAR_MATCHER) > -1) {
                    // Contains text content in a text node, must read.
                    return true;
                } else if (child.nodeType === Node.ELEMENT_NODE
                    && !isBlockElement(child)
                    && !this._isAnnounceableElement(child)
                )
                {
                    // Contains phrasing content, must read.
                    return true;
                }
            }
        } else {
            // If there are no children, it's atomic by definition.
            return true;
        }

        // Any other result is false
        return false;
    }

    /**
     * @param {Element} element
     * @returns {boolean} True if the element should be treated as an atomic readable element.
     */
    _isAnnounceableElement(element) {
        // Images and pagebreaks
        return (element.tagName === 'IMG') ||
            (element.getAttribute('epub:type') === 'pagebreak') ||
            (element.getAttribute('role') === 'doc-pagebreak');
    }

    /**
     * @param {Element} element 
     * @returns {boolean} Returns true if the element matches the TtsPlayer's ceiling node or if it is the body tag.
     */
    _isAtCeiling(element) {
        return (element === this.ceiling || element.tagName === 'BODY')
    }

    _clearData() {
        console.log("Clear data was called.");
        this.activeElement = null;
        this.sentences = null;
        this.currentSentence = -1
    }

    /**
     * @param {Number} clientX
     * @param {Number} clientY
     * @param {Range} range
     */
    _isPointInRange(clientX, clientY, range) {
        const rects = range.getClientRects();

        for (let r of rects) {
            if (
                (clientX >= r.x && clientX < r.x + r.width)
                &&
                (clientY >= r.y && clientY < r.y + r.height)
            )
            {
                return true;
            }
        }
        return false;
    }

}

export default TtsPlayer;