components/speech/speech_recognizer_fsm.h - chromium/src.git - Git at Google

 // Copyright 2024 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef COMPONENTS_SPEECH_SPEECH_RECOGNIZER_FSM_H_
 #define COMPONENTS_SPEECH_SPEECH_RECOGNIZER_FSM_H_

 #include <vector>

 #include "base/memory/ref_counted.h"
 #include "components/speech/audio_buffer.h"
 #include "media/mojo/mojom/audio_data.mojom.h"
 #include "media/mojo/mojom/speech_recognition_error.mojom.h"
 #include "media/mojo/mojom/speech_recognition_recognition_context.h"
 #include "media/mojo/mojom/speech_recognition_result.mojom.h"

 namespace speech {

 // Interface for the speech recognizer finite-state machine used to power the
 // Web Speech API.
 // TODO(crbug.com/40286514): Remove the scoped_refptr usage.
 class SpeechRecognizerFsm {
  public:
   // The Finite State Machine states of the recognizer in sequential order.
   enum FSMState {
     STATE_IDLE = 0,
     STATE_PREPARING,
     STATE_STARTING,
     STATE_ESTIMATING_ENVIRONMENT,
     STATE_WAITING_FOR_SPEECH,
     STATE_RECOGNIZING,
     STATE_WAITING_FINAL_RESULT,
     STATE_ENDED,
     STATE_MAX_VALUE = STATE_ENDED
   };

   // The Finite State Machine events used by the recognizer. Event dispatching
   // must be sequential, otherwise it will break the rules and the assumptions
   // of the finite state automata model.
   enum FSMEvent {
     EVENT_ABORT = 0,
     EVENT_PREPARE,
     EVENT_START,
     EVENT_STOP_CAPTURE,
     EVENT_AUDIO_DATA,
     EVENT_ENGINE_RESULT,
     EVENT_UPDATE_RECOGNITION_CONTEXT,
     EVENT_ENGINE_ERROR,
     EVENT_AUDIO_ERROR,
     EVENT_MAX_VALUE = EVENT_AUDIO_ERROR
   };

   struct FSMEventArgs {
     explicit FSMEventArgs(FSMEvent event_value);
     FSMEventArgs(const FSMEventArgs& other);
     ~FSMEventArgs();

     FSMEvent event;
     media::mojom::AudioDataS16Ptr audio_data;
     scoped_refptr<AudioChunk> audio_chunk;
     std::vector<media::mojom::WebSpeechRecognitionResultPtr> engine_results;
     media::SpeechRecognitionRecognitionContext recognition_context;
     media::mojom::SpeechRecognitionError engine_error;
   };

   // Defines the behavior of the recognizer FSM, selecting the appropriate
   // transition according to the current state and event.
   FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);

   // Entry point for pushing any new external event into the recognizer FSM.
   virtual void DispatchEvent(const FSMEventArgs& event_args) = 0;

   // The methods below handle transitions of the recognizer FSM.
   virtual void ProcessAudioPipeline(const FSMEventArgs& event_args) = 0;
   virtual FSMState PrepareRecognition(const FSMEventArgs&) = 0;
   virtual FSMState StartRecording(const FSMEventArgs& event_args) = 0;
   virtual FSMState StartRecognitionEngine(const FSMEventArgs& event_args) = 0;
   virtual FSMState WaitEnvironmentEstimationCompletion(
       const FSMEventArgs& event_args) = 0;
   virtual FSMState DetectUserSpeechOrTimeout(
       const FSMEventArgs& event_args) = 0;
   virtual FSMState StopCaptureAndWaitForResult(
       const FSMEventArgs& event_args) = 0;
   virtual FSMState ProcessIntermediateResult(
       const FSMEventArgs& event_args) = 0;
   virtual FSMState ProcessFinalResult(const FSMEventArgs& event_args) = 0;
   virtual FSMState AbortSilently(const FSMEventArgs& event_args) = 0;
   virtual FSMState AbortWithError(const FSMEventArgs& event_args) = 0;
   virtual FSMState Abort(const media::mojom::SpeechRecognitionError& error) = 0;
   virtual FSMState DetectEndOfSpeech(const FSMEventArgs& event_args) = 0;
   virtual FSMState UpdateRecognitionContext(const FSMEventArgs& event_args) = 0;
   virtual FSMState DoNothing(const FSMEventArgs& event_args) const = 0;
   virtual FSMState NotFeasible(const FSMEventArgs& event_args) = 0;

  protected:
   friend class base::RefCountedThreadSafe<SpeechRecognizerFsm>;
   virtual ~SpeechRecognizerFsm() = default;
   FSMState state_ = STATE_IDLE;
   bool is_dispatching_event_ = false;
 };

 }  // namespace speech

 #endif  // COMPONENTS_SPEECH_SPEECH_RECOGNIZER_FSM_H_
	// Copyright 2024 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#ifndef COMPONENTS_SPEECH_SPEECH_RECOGNIZER_FSM_H_
	#define COMPONENTS_SPEECH_SPEECH_RECOGNIZER_FSM_H_

	#include <vector>

	#include "base/memory/ref_counted.h"
	#include "components/speech/audio_buffer.h"
	#include "media/mojo/mojom/audio_data.mojom.h"
	#include "media/mojo/mojom/speech_recognition_error.mojom.h"
	#include "media/mojo/mojom/speech_recognition_recognition_context.h"
	#include "media/mojo/mojom/speech_recognition_result.mojom.h"

	namespace speech {

	// Interface for the speech recognizer finite-state machine used to power the
	// Web Speech API.
	// TODO(crbug.com/40286514): Remove the scoped_refptr usage.
	class SpeechRecognizerFsm {
	public:
	// The Finite State Machine states of the recognizer in sequential order.
	enum FSMState {
	STATE_IDLE = 0,
	STATE_PREPARING,
	STATE_STARTING,
	STATE_ESTIMATING_ENVIRONMENT,
	STATE_WAITING_FOR_SPEECH,
	STATE_RECOGNIZING,
	STATE_WAITING_FINAL_RESULT,
	STATE_ENDED,
	STATE_MAX_VALUE = STATE_ENDED
	};

	// The Finite State Machine events used by the recognizer. Event dispatching
	// must be sequential, otherwise it will break the rules and the assumptions
	// of the finite state automata model.
	enum FSMEvent {
	EVENT_ABORT = 0,
	EVENT_PREPARE,
	EVENT_START,
	EVENT_STOP_CAPTURE,
	EVENT_AUDIO_DATA,
	EVENT_ENGINE_RESULT,
	EVENT_UPDATE_RECOGNITION_CONTEXT,
	EVENT_ENGINE_ERROR,
	EVENT_AUDIO_ERROR,
	EVENT_MAX_VALUE = EVENT_AUDIO_ERROR
	};

	struct FSMEventArgs {
	explicit FSMEventArgs(FSMEvent event_value);
	FSMEventArgs(const FSMEventArgs& other);
	~FSMEventArgs();

	FSMEvent event;
	media::mojom::AudioDataS16Ptr audio_data;
	scoped_refptr<AudioChunk> audio_chunk;
	std::vector<media::mojom::WebSpeechRecognitionResultPtr> engine_results;
	media::SpeechRecognitionRecognitionContext recognition_context;
	media::mojom::SpeechRecognitionError engine_error;
	};

	// Defines the behavior of the recognizer FSM, selecting the appropriate
	// transition according to the current state and event.
	FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);

	// Entry point for pushing any new external event into the recognizer FSM.
	virtual void DispatchEvent(const FSMEventArgs& event_args) = 0;

	// The methods below handle transitions of the recognizer FSM.
	virtual void ProcessAudioPipeline(const FSMEventArgs& event_args) = 0;
	virtual FSMState PrepareRecognition(const FSMEventArgs&) = 0;
	virtual FSMState StartRecording(const FSMEventArgs& event_args) = 0;
	virtual FSMState StartRecognitionEngine(const FSMEventArgs& event_args) = 0;
	virtual FSMState WaitEnvironmentEstimationCompletion(
	const FSMEventArgs& event_args) = 0;
	virtual FSMState DetectUserSpeechOrTimeout(
	const FSMEventArgs& event_args) = 0;
	virtual FSMState StopCaptureAndWaitForResult(
	const FSMEventArgs& event_args) = 0;
	virtual FSMState ProcessIntermediateResult(
	const FSMEventArgs& event_args) = 0;
	virtual FSMState ProcessFinalResult(const FSMEventArgs& event_args) = 0;
	virtual FSMState AbortSilently(const FSMEventArgs& event_args) = 0;
	virtual FSMState AbortWithError(const FSMEventArgs& event_args) = 0;
	virtual FSMState Abort(const media::mojom::SpeechRecognitionError& error) = 0;
	virtual FSMState DetectEndOfSpeech(const FSMEventArgs& event_args) = 0;
	virtual FSMState UpdateRecognitionContext(const FSMEventArgs& event_args) = 0;
	virtual FSMState DoNothing(const FSMEventArgs& event_args) const = 0;
	virtual FSMState NotFeasible(const FSMEventArgs& event_args) = 0;

	protected:
	friend class base::RefCountedThreadSafe<SpeechRecognizerFsm>;
	virtual ~SpeechRecognizerFsm() = default;
	FSMState state_ = STATE_IDLE;
	bool is_dispatching_event_ = false;
	};

	} // namespace speech

	#endif // COMPONENTS_SPEECH_SPEECH_RECOGNIZER_FSM_H_