blob: f83e245358da8a5f37a315182b83da29b81dad20 [file] [log] [blame]
// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/translate/content/renderer/translate_agent.h"
#include <stddef.h>
#include <string>
#include <utility>
#include "base/check_op.h"
#include "base/command_line.h"
#include "base/compiler_specific.h"
#include "base/functional/bind.h"
#include "base/json/string_escape.h"
#include "base/location.h"
#include "base/metrics/histogram_functions.h"
#include "base/metrics/histogram_macros.h"
#include "base/metrics/histogram_macros_local.h"
#include "base/no_destructor.h"
#include "base/notreached.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/task/single_thread_task_runner.h"
#include "base/trace_event/trace_event.h"
#include "components/language_detection/content/renderer/language_detection_agent.h"
#include "components/language_detection/core/constants.h"
#include "components/language_detection/core/language_detection_provider.h"
#include "components/translate/content/renderer/isolated_world_util.h"
#include "components/translate/core/common/translate_metrics.h"
#include "components/translate/core/common/translate_util.h"
#include "components/translate/core/language_detection/language_detection_model.h"
#include "components/translate/core/language_detection/language_detection_util.h"
#include "content/public/common/content_constants.h"
#include "content/public/common/content_switches.h"
#include "content/public/common/url_constants.h"
#include "content/public/renderer/render_frame.h"
#include "content/public/renderer/render_thread.h"
#include "third_party/blink/public/platform/browser_interface_broker_proxy.h"
#include "third_party/blink/public/platform/scheduler/web_agent_group_scheduler.h"
#include "third_party/blink/public/web/web_document.h"
#include "third_party/blink/public/web/web_language_detection_details.h"
#include "third_party/blink/public/web/web_local_frame.h"
#include "third_party/blink/public/web/web_script_source.h"
#include "url/gurl.h"
#include "url/url_constants.h"
#include "v8/include/v8.h"
using blink::WebDocument;
using blink::WebLanguageDetectionDetails;
using blink::WebLocalFrame;
using blink::WebScriptSource;
using blink::WebString;
namespace {
// The delay in milliseconds that we'll wait before checking to see if the
// translate library injected in the page is ready.
const int kTranslateInitCheckDelayMs = 150;
// The maximum number of times we'll check to see if the translate library
// injected in the page is ready.
const int kMaxTranslateInitCheckAttempts = 5;
// The delay we wait in milliseconds before checking whether the translation has
// finished.
const int kTranslateStatusCheckDelayMs = 400;
// Language name passed to the Translate element for it to detect the language.
const char kAutoDetectionLanguage[] = "auto";
// The current CLD model version.
constexpr char kCLDModelVersion[] = "CLD3";
// Returns the language detection model that is shared across the RenderFrames
// in the renderer.
translate::LanguageDetectionModel& GetLanguageDetectionModel() {
static base::NoDestructor<translate::LanguageDetectionModel> instance(
language_detection::GetLanguageDetectionModel());
return *instance;
}
// Returns if the language detection should be overridden so that a default
// result is returned immediately.
bool ShouldOverrideLanguageDetectionForTesting() {
base::CommandLine* command_line = base::CommandLine::ForCurrentProcess();
if (command_line->HasSwitch(::switches::kOverrideLanguageDetection)) {
return true;
}
return false;
}
} // namespace
namespace translate {
////////////////////////////////////////////////////////////////////////////////
// TranslateAgent, public:
TranslateAgent::TranslateAgent(content::RenderFrame* render_frame, int world_id)
: content::RenderFrameObserver(render_frame),
world_id_(world_id),
translate_language_detection_model_(GetLanguageDetectionModel()),
language_detection_agent_(
IsTFLiteLanguageDetectionEnabled()
? new language_detection::LanguageDetectionAgent(
render_frame,
translate_language_detection_model_->tflite_model())
: nullptr) {
translate_task_runner_ = this->render_frame()->GetTaskRunner(
blink::TaskType::kInternalTranslation);
}
TranslateAgent::~TranslateAgent() = default;
void TranslateAgent::SeedLanguageDetectionModelForTesting(
base::File model_file) {
translate_language_detection_model_->tflite_model().UpdateWithFile(
std::move(model_file));
}
void TranslateAgent::PrepareForUrl(const GURL& url) {
// Navigated to a new url, reset current page translation.
ResetPage();
}
void TranslateAgent::PageCaptured(
scoped_refptr<const base::RefCountedString16> contents) {
TRACE_EVENT("browser", "TranslateAgent::PageCaptured");
// Get the document language as set by WebKit from the http-equiv
// meta tag for "content-language". This may or may not also
// have a value derived from the actual Content-Language HTTP
// header. The two actually have different meanings (despite the
// original intent of http-equiv to be an equivalent) with the former
// being the language of the document and the latter being the
// language of the intended audience (a distinction really only
// relevant for things like language textbooks). This distinction
// shouldn't affect translation.
if (!contents) {
return;
}
WebLocalFrame* main_frame = render_frame()->GetWebFrame();
if (!main_frame)
return;
WebDocument document = main_frame->GetDocument();
GURL url = GURL(document.Url());
// Limit detection to URLs that only detect the language of the content if the
// page is potentially a candidate for translation. This should be strictly a
// subset of the conditions in TranslateService::IsTranslatableURL, however,
// due to layering they cannot be identical. Critically, this list should
// never filter anything that is eligible for translation. Under filtering is
// ok as the translate service will make the final call and only results in a
// slight overhead in running the model when unnecessary.
if (url.is_empty() || url.SchemeIs(content::kChromeUIScheme) ||
url.SchemeIs(content::kChromeDevToolsScheme) || url.IsAboutBlank()) {
return;
}
page_contents_length_ = contents->as_string().size();
WebLanguageDetectionDetails web_detection_details =
WebLanguageDetectionDetails::CollectLanguageDetectionDetails(document);
WebLanguageDetectionDetails::RecordAcceptLanguageAndXmlHtmlLangMetric(
document);
std::string content_language = web_detection_details.content_language.Utf8();
std::string html_lang = web_detection_details.html_language.Utf8();
std::string model_detected_language;
bool is_model_reliable = false;
std::string detection_model_version;
float model_reliability_score = 0.0;
if (ShouldOverrideLanguageDetectionForTesting()) {
std::string language = "fr";
LanguageDetectionDetails details;
details.adopted_language = language;
details.contents = contents->as_string();
details.has_run_lang_detection = true;
ResetPage();
last_details_ = std::move(details);
RenewPageRegistration();
return;
}
LanguageDetectionDetails details;
std::string language;
if (page_contents_length_ == 0) {
// If captured content is empty do not run language detection and
// only use page-provided languages.
language = translate::DeterminePageLanguageNoModel(
content_language, html_lang,
translate::LanguageVerificationType::kNoPageContent);
} else if (translate::IsTFLiteLanguageDetectionEnabled()) {
// Use TFLite and page contents to assist with language detection.
bool is_available = translate_language_detection_model_->IsAvailable();
language =
is_available
? translate_language_detection_model_->DeterminePageLanguage(
content_language, html_lang, contents->as_string(),
&model_detected_language, &is_model_reliable,
model_reliability_score)
// If the model is not available do not run language
// detection and only use page-provided languages.
: translate::DeterminePageLanguageNoModel(
content_language, html_lang,
translate::LanguageVerificationType::kModelNotAvailable);
UMA_HISTOGRAM_BOOLEAN(
"LanguageDetection.TFLiteModel.WasModelAvailableForDetection",
is_available);
UMA_HISTOGRAM_BOOLEAN(
"LanguageDetection.TFLiteModel.WasModelUnavailableDueToDeferredLoad",
!is_available &&
language_detection_agent_->waiting_for_first_foreground());
detection_model_version =
translate_language_detection_model_->GetModelVersion();
details.has_run_lang_detection = true;
} else {
// Use CLD3 and page contents to assist with language detection.
language = DeterminePageLanguage(
content_language, html_lang, contents->as_string(),
&model_detected_language, &is_model_reliable, model_reliability_score);
detection_model_version = kCLDModelVersion;
details.has_run_lang_detection = true;
}
if (language.empty())
return;
details.time = base::Time::Now();
details.url = web_detection_details.url;
details.content_language = content_language;
details.model_detected_language = model_detected_language;
details.is_model_reliable = is_model_reliable;
details.has_notranslate = web_detection_details.has_no_translate_meta;
details.html_root_language = html_lang;
details.adopted_language = language;
details.model_reliability_score = model_reliability_score;
details.detection_model_version = detection_model_version;
// TODO(hajimehoshi): If this affects performance, it should be set only if
// translate-internals tab exists.
details.contents = contents->as_string();
// For the same render frame with the same url, each time when its texts are
// captured, it should be treated as a new page to do translation.
ResetPage();
last_details_ = std::move(details);
RenewPageRegistration();
}
void TranslateAgent::RenewPageRegistration() {
if (!last_details_.has_value()) {
return;
}
WebLocalFrame* main_frame = render_frame()->GetWebFrame();
if (!main_frame) {
return;
}
LanguageDetectionDetails details = std::move(*last_details_);
ResetPage();
GetTranslateHandler()->RegisterPage(
receiver_.BindNewPipeAndPassRemote(
main_frame->GetTaskRunner(blink::TaskType::kInternalTranslation)),
details, !details.has_notranslate && !details.adopted_language.empty());
last_details_ = std::move(details);
}
void TranslateAgent::CancelPendingTranslation() {
weak_method_factory_.InvalidateWeakPtrs();
// Make sure to send the cancelled response back.
if (translate_callback_pending_) {
std::move(translate_callback_pending_)
.Run(true, source_lang_, target_lang_, TranslateErrors::NONE);
}
source_lang_.clear();
target_lang_.clear();
}
////////////////////////////////////////////////////////////////////////////////
// TranslateAgent, protected:
bool TranslateAgent::IsTranslateLibAvailable() {
return ExecuteScriptAndGetBoolResult(
"typeof cr != 'undefined' && typeof cr.googleTranslate != 'undefined' && "
"typeof cr.googleTranslate.translate == 'function'",
false);
}
bool TranslateAgent::IsTranslateLibReady() {
return ExecuteScriptAndGetBoolResult("cr.googleTranslate.libReady", false);
}
bool TranslateAgent::HasTranslationFinished() {
return ExecuteScriptAndGetBoolResult("cr.googleTranslate.finished", true);
}
bool TranslateAgent::HasTranslationFailed() {
return ExecuteScriptAndGetBoolResult("cr.googleTranslate.error", true);
}
int64_t TranslateAgent::GetErrorCode() {
int64_t error_code =
ExecuteScriptAndGetIntegerResult("cr.googleTranslate.errorCode");
DCHECK_LT(error_code, static_cast<int>(TranslateErrors::TRANSLATE_ERROR_MAX));
return error_code;
}
bool TranslateAgent::StartTranslation() {
return ExecuteScriptAndGetBoolResult(
BuildTranslationScript(source_lang_, target_lang_), false);
}
std::string TranslateAgent::GetPageSourceLanguage() {
return ExecuteScriptAndGetStringResult("cr.googleTranslate.sourceLang");
}
base::TimeDelta TranslateAgent::AdjustDelay(int delay_in_milliseconds) {
// Just converts |delay_in_milliseconds| without any modification in practical
// cases. Tests will override this function to return modified value.
return base::Milliseconds(delay_in_milliseconds);
}
void TranslateAgent::ExecuteScript(const std::string& script) {
WebLocalFrame* main_frame = render_frame()->GetWebFrame();
if (!main_frame)
return;
WebScriptSource source = WebScriptSource(WebString::FromASCII(script));
main_frame->ExecuteScriptInIsolatedWorld(
world_id_, source, blink::BackForwardCacheAware::kAllow);
}
bool TranslateAgent::ExecuteScriptAndGetBoolResult(const std::string& script,
bool fallback) {
WebLocalFrame* main_frame = render_frame()->GetWebFrame();
if (!main_frame)
return fallback;
v8::HandleScope handle_scope(main_frame->GetAgentGroupScheduler()->Isolate());
WebScriptSource source = WebScriptSource(WebString::FromASCII(script));
v8::Local<v8::Value> result =
main_frame->ExecuteScriptInIsolatedWorldAndReturnValue(
world_id_, source, blink::BackForwardCacheAware::kAllow);
if (result.IsEmpty() || !result->IsBoolean()) {
return fallback;
}
return result.As<v8::Boolean>()->Value();
}
std::string TranslateAgent::ExecuteScriptAndGetStringResult(
const std::string& script) {
WebLocalFrame* main_frame = render_frame()->GetWebFrame();
if (!main_frame)
return std::string();
v8::Isolate* isolate = main_frame->GetAgentGroupScheduler()->Isolate();
v8::HandleScope handle_scope(isolate);
WebScriptSource source = WebScriptSource(WebString::FromASCII(script));
v8::Local<v8::Value> result =
main_frame->ExecuteScriptInIsolatedWorldAndReturnValue(
world_id_, source, blink::BackForwardCacheAware::kAllow);
if (result.IsEmpty() || !result->IsString()) {
return std::string();
}
v8::Local<v8::String> v8_str = result.As<v8::String>();
size_t length = v8_str->Utf8LengthV2(isolate);
if (length == 0) {
return std::string();
}
std::string str(length, '\0');
v8_str->WriteUtf8V2(isolate, str.data(), length);
return str;
}
double TranslateAgent::ExecuteScriptAndGetDoubleResult(
const std::string& script) {
WebLocalFrame* main_frame = render_frame()->GetWebFrame();
if (!main_frame)
return 0.0;
v8::HandleScope handle_scope(main_frame->GetAgentGroupScheduler()->Isolate());
WebScriptSource source = WebScriptSource(WebString::FromASCII(script));
v8::Local<v8::Value> result =
main_frame->ExecuteScriptInIsolatedWorldAndReturnValue(
world_id_, source, blink::BackForwardCacheAware::kAllow);
if (result.IsEmpty() || !result->IsNumber()) {
return 0.0;
}
return result.As<v8::Number>()->Value();
}
int64_t TranslateAgent::ExecuteScriptAndGetIntegerResult(
const std::string& script) {
WebLocalFrame* main_frame = render_frame()->GetWebFrame();
if (!main_frame)
return 0;
v8::HandleScope handle_scope(main_frame->GetAgentGroupScheduler()->Isolate());
WebScriptSource source = WebScriptSource(WebString::FromASCII(script));
v8::Local<v8::Value> result =
main_frame->ExecuteScriptInIsolatedWorldAndReturnValue(
world_id_, source, blink::BackForwardCacheAware::kAllow);
if (result.IsEmpty() || !result->IsNumber()) {
return 0;
}
return result.As<v8::Integer>()->Value();
}
// mojom::TranslateAgent implementations.
void TranslateAgent::TranslateFrame(const std::string& translate_script,
const std::string& source_lang,
const std::string& target_lang,
TranslateFrameCallback callback) {
WebLocalFrame* main_frame = render_frame()->GetWebFrame();
if (!main_frame) {
// Cancelled.
std::move(callback).Run(true, source_lang, target_lang,
TranslateErrors::NONE);
return; // We navigated away, nothing to do.
}
// A similar translation is already under way, nothing to do.
if (translate_callback_pending_ && target_lang_ == target_lang) {
// This request is ignored.
std::move(callback).Run(true, source_lang, target_lang,
TranslateErrors::NONE);
return;
}
// Any pending translation is now irrelevant.
CancelPendingTranslation();
// Set our states.
translate_callback_pending_ = std::move(callback);
// If the source language is undetermined, we'll let the translate element
// detect it.
source_lang_ = (source_lang != language_detection::kUnknownLanguageCode)
? source_lang
: kAutoDetectionLanguage;
target_lang_ = target_lang;
// Set up v8 isolated world.
EnsureIsolatedWorldInitialized(world_id_);
if (!IsTranslateLibAvailable()) {
// Evaluate the script to add the translation related method to the global
// context of the page.
ExecuteScript(translate_script);
DCHECK(IsTranslateLibAvailable());
}
TranslatePageImpl(0);
}
void TranslateAgent::RevertTranslation() {
if (!IsTranslateLibAvailable()) {
DUMP_WILL_BE_NOTREACHED();
return;
}
CancelPendingTranslation();
ExecuteScript("cr.googleTranslate.revert()");
}
////////////////////////////////////////////////////////////////////////////////
// TranslateAgent, private:
void TranslateAgent::CheckTranslateStatus() {
// First check if there was an error.
if (HasTranslationFailed()) {
NotifyBrowserTranslationFailed(
static_cast<translate::TranslateErrors>(GetErrorCode()));
return; // There was an error.
}
if (HasTranslationFinished()) {
std::string actual_source_lang;
// Translation was successfull, if it was auto, retrieve the source
// language the Translate Element detected.
if (source_lang_ == kAutoDetectionLanguage) {
actual_source_lang = GetPageSourceLanguage();
if (actual_source_lang.empty()) {
NotifyBrowserTranslationFailed(TranslateErrors::UNKNOWN_LANGUAGE);
return;
} else if (actual_source_lang == target_lang_) {
NotifyBrowserTranslationFailed(TranslateErrors::IDENTICAL_LANGUAGES);
return;
}
} else {
actual_source_lang = source_lang_;
}
if (!translate_callback_pending_) {
NOTREACHED();
}
// Check JavaScript performance counters for UMA reports.
ReportTimeToTranslate(
ExecuteScriptAndGetDoubleResult("cr.googleTranslate.translationTime"));
ReportTranslatedLanguageDetectionContentLength(page_contents_length_);
// Notify the browser we are done.
std::move(translate_callback_pending_)
.Run(false, actual_source_lang, target_lang_, TranslateErrors::NONE);
return;
}
// The translation is still pending, check again later.
translate_task_runner_->PostDelayedTask(
FROM_HERE,
base::BindOnce(&TranslateAgent::CheckTranslateStatus,
weak_method_factory_.GetWeakPtr()),
AdjustDelay(kTranslateStatusCheckDelayMs));
}
void TranslateAgent::TranslatePageImpl(int count) {
DCHECK_LT(count, kMaxTranslateInitCheckAttempts);
if (!IsTranslateLibReady()) {
// There was an error during initialization of library.
TranslateErrors error =
static_cast<translate::TranslateErrors>(GetErrorCode());
if (error != TranslateErrors::NONE) {
NotifyBrowserTranslationFailed(error);
return;
}
// The library is not ready, try again later, unless we have tried several
// times unsuccessfully already.
if (++count >= kMaxTranslateInitCheckAttempts) {
NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_TIMEOUT);
return;
}
translate_task_runner_->PostDelayedTask(
FROM_HERE,
base::BindOnce(&TranslateAgent::TranslatePageImpl,
weak_method_factory_.GetWeakPtr(), count),
AdjustDelay(count * kTranslateInitCheckDelayMs));
return;
}
// The library is loaded, and ready for translation now.
// Check JavaScript performance counters for UMA reports.
ReportTimeToBeReady(
ExecuteScriptAndGetDoubleResult("cr.googleTranslate.readyTime"));
ReportTimeToLoad(
ExecuteScriptAndGetDoubleResult("cr.googleTranslate.loadTime"));
if (!StartTranslation()) {
CheckTranslateStatus();
return;
}
// Check the status of the translation.
translate_task_runner_->PostDelayedTask(
FROM_HERE,
base::BindOnce(&TranslateAgent::CheckTranslateStatus,
weak_method_factory_.GetWeakPtr()),
AdjustDelay(kTranslateStatusCheckDelayMs));
}
void TranslateAgent::NotifyBrowserTranslationFailed(TranslateErrors error) {
DCHECK(translate_callback_pending_);
// Notify the browser there was an error.
std::move(translate_callback_pending_)
.Run(false, source_lang_, target_lang_, error);
}
const mojo::Remote<mojom::ContentTranslateDriver>&
TranslateAgent::GetTranslateHandler() {
if (translate_handler_) {
if (translate_handler_.is_connected()) {
return translate_handler_;
}
// The translate handler can become unbound or disconnected in testing
// so this catches that case and reconnects so `this` can connect to
// the driver in the browser.
translate_handler_.reset();
}
render_frame()->GetBrowserInterfaceBroker().GetInterface(
translate_handler_.BindNewPipeAndPassReceiver());
return translate_handler_;
}
void TranslateAgent::ResetPage() {
last_details_ = {};
receiver_.reset();
translate_callback_pending_.Reset();
CancelPendingTranslation();
}
void TranslateAgent::OnDestruct() {
delete this;
}
/* static */
std::string TranslateAgent::BuildTranslationScript(
const std::string& source_lang,
const std::string& target_lang) {
return "cr.googleTranslate.translate(" +
base::GetQuotedJSONString(source_lang) + "," +
base::GetQuotedJSONString(target_lang) + ")";
}
} // namespace translate