import { EventEmitter } from "events";
import * as SpeechSDK from "microsoft-cognitiveservices-speech-sdk";
import { concatTranscripts } from "../utils";
import { getToken } from "./utils";

export default class SpeechRecognizer extends EventEmitter {
  constructor() {
    super();

    this.isListening = false;
    this.isRecognizing = false;
    this.lang = "en-US";
    this.phrases = null;
    this.continuous = false;
    this.enablePronunciationAssessment = false;
    this.referenceText = "";
    this.mediaStream = undefined;
    this.finalTranscript = "";
  }

  setAuthentication(token, region) {
    this.token = token;
    this.region = region;
  }

  getAudioConfig() {
    if (this.mediaStream) {
      return SpeechSDK.AudioConfig.fromMicrophoneInput(this.mediaStream);
    } else {
      return SpeechSDK.AudioConfig.fromDefaultMicrophoneInput();
    }
  }

  getSpeechConfig() {
    const speechConfig = SpeechSDK.SpeechConfig.fromAuthorizationToken(
      this.token,
      this.region
    );
    speechConfig.speechRecognitionLanguage = this.lang;
    return speechConfig;
  }

  getPronunciationAssessmentConfig() {
    const pronunciationAssessmentConfig =
      new SpeechSDK.PronunciationAssessmentConfig(
        this.referenceText,
        SpeechSDK.PronunciationAssessmentGradingSystem.HundredMark,
        SpeechSDK.PronunciationAssessmentGranularity.Phoneme,
        false
      );

    pronunciationAssessmentConfig.enableProsodyAssessment = true;
    // pronunciationAssessmentConfig.enableContentAssessmentWithTopic("general conversation");
    // console.log(pronunciationAssessmentConfig.toJSON());
    return pronunciationAssessmentConfig;
  }

  getPronunciationAssessmentConfigFromJson() {
    var pronunciationAssessmentConfig =
      SpeechSDK.PronunciationAssessmentConfig.fromJSON(
        '{"GradingSystem": "HundredMark", \
      "Granularity": "Word", \
      "EnableMiscue": "True", \
      "EnableProsodyAssessment": "True", \
      "ScenarioId": "[scenario ID will be assigned by product team]"}'
      );
    pronunciationAssessmentConfig.referenceText = this.referenceText;

    return pronunciationAssessmentConfig;
  }

  onRecognizing(sender, recognitionEventArgs) {
    const result = recognitionEventArgs.result;

    console.log(
      "[USER-SPEAKING][" + new Date().toISOString() + "]",
      result.text
    );

    this.setIsRecognizing(true);

    this.emit("recognizingResult", result);
  }

  onRecognized(sender, recognitionEventArgs) {
    const result = recognitionEventArgs.result;

    this.onRecognizedResult(result);
  }

  onRecognizedResult(result) {
    console.log("[USER-SPOKEN][" + new Date().toISOString() + "]", result.text);

    this.setIsRecognizing(false);
    this.emit("recognizedResult", result);

    switch (result.reason) {
      case SpeechSDK.ResultReason.NoMatch:
        break;
      case SpeechSDK.ResultReason.Canceled:
        break;
      case SpeechSDK.ResultReason.RecognizedSpeech:
      case SpeechSDK.ResultReason.TranslatedSpeech:
      case SpeechSDK.ResultReason.RecognizedIntent:
        const _result = {
          text: result.text,
        };

        // Intent
        const intentJson = result.properties.getProperty(
          SpeechSDK.PropertyId.LanguageUnderstandingServiceResponse_JsonResult
        );

        if (intentJson) {
          _result.intent = intentJson;
        }

        // Translation
        if (result.translations) {
          const resultJson = JSON.parse(result.json);
          _result.translated = resultJson.translations;
        }

        // Pronunciation
        if (this.enablePronunciationAssessment) {
          _result.pronunciation = JSON.parse(
            result.properties.getProperty(
              SpeechSDK.PropertyId.SpeechServiceResponse_JsonResult
            )
          );
        }

        this.finalTranscript = concatTranscripts(
          this.finalTranscript,
          result.text
        );

        _result.finalTranscript = this.finalTranscript;

        this.emit("result", _result);
        break;
    }
  }

  onCanceled(sender, cancellationEventArgs) {
    console.log("[AzureSpeechRecognition] Canceled:", cancellationEventArgs);

    const reason = SpeechSDK.CancellationReason[cancellationEventArgs.reason];
    const details = cancellationEventArgs.errorDetails;
  }

  onSessionStarted(sender, sessionEventArgs) {
    console.log("[AzureSpeechRecognition] Session started");

    this.setIsListening(true);
  }

  onSessionStopped(sender, sessionEventArgs) {
    console.log("[AzureSpeechRecognition] Session stopped");

    this.setIsListening(false);
  }

  async start() {
    console.log("[AzureSpeechRecognition] Session starting");

    const { token, region } = await getToken();
    this.setAuthentication(token, region);

    const audioConfig = this.getAudioConfig();
    const speechConfig = this.getSpeechConfig();

    this.recognizer = new SpeechSDK.SpeechRecognizer(speechConfig, audioConfig);
    this.recognizer.recognizing = this.onRecognizing.bind(this);
    this.recognizer.recognized = this.continuous
      ? this.onRecognized.bind(this)
      : undefined;
    this.recognizer.canceled = this.onCanceled.bind(this);
    this.recognizer.sessionStarted = this.onSessionStarted.bind(this);
    this.recognizer.sessionStopped = this.onSessionStopped.bind(this);

    if (this.phrases) {
      const phraseListGrammar = SpeechSDK.PhraseListGrammar.fromRecognizer(
        this.recognizer
      );
      phraseListGrammar.addPhrases(this.phrases);
    }

    if (this.enablePronunciationAssessment) {
      const pronunciationAssessmentConfig =
        this.getPronunciationAssessmentConfig();
      pronunciationAssessmentConfig.applyTo(this.recognizer);

      // this.applyConnection(this.recognizer, "general conversation");
    }

    return new Promise((resolve, reject) => {
      if (this.continuous) {
        this.recognizer.startContinuousRecognitionAsync(resolve, reject);
      } else {
        this.recognizer.recognizeOnceAsync(
          (successfulResult) => {
            resolve(successfulResult);
            this.onRecognizedResult(successfulResult);
          },
          (err) => {
            console.log(err);
            reject(err);
          }
        );
      }
    });
  }

  applyConnection(reco, topic) {
    const connection = SpeechSDK.Connection.fromRecognizer(reco);
    const phraseDetectionConfig = `{
        "enrichment": {
            "pronunciationAssessment": {
                "referenceText": "",
                "gradingSystem": "HundredMark",
                "granularity": "Phoneme",
                "dimension": "Comprehensive",
                "EnableMiscue": "False"
            },
            "contentAssessment": {
                "topic": "${topic}"
            }
        }
    }`;
    connection.setMessageProperty(
      "speech.context",
      "phraseDetection",
      JSON.parse(phraseDetectionConfig)
    );

    const phraseOutputConfig = `{
        "format": "Detailed",
        "detailed": {
            "options": [
                "WordTimings",
                "PronunciationAssessment",
                "ContentAssessment",
                "SNR"
            ]
        }
    }`;
    connection.setMessageProperty(
      "speech.context",
      "phraseOutput",
      JSON.parse(phraseOutputConfig)
    );
    connection.close();
  }

  stop() {
    console.log("[AzureSpeechRecognition] stop");

    if (!this.isListening) {
      return;
    }

    const close = () => {
      this.recognizer?.close();
      this.recognizer = undefined;
      this.setIsListening(false);
    };

    if (this.continuous) {
      this.recognizer?.stopContinuousRecognitionAsync(
        () => close(),
        (err) => close()
      );
    } else {
      close();
    }
  }

  setIsListening(listening) {
    console.log("[AzureSpeechRecognition] setIsListening", listening);

    if (listening === this.isListening) {
      return;
    }
    this.isListening = listening;
    this.emit("isListeningChanged", listening);
  }

  setIsRecognizing(recognizing) {
    // console.log("[AzureSpeechRecognition] setIsRecognizing", recognizing);

    if (recognizing === this.isRecognizing) {
      return;
    }
    this.isRecognizing = recognizing;
    this.emit("isRecognizingChanged", recognizing);
  }
}
