blob: e63cd9c54068bb3bb24d1ea99e6de55c212d40f0 [file] [log] [blame]
// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/content_extraction/inner_text.h"
#include "base/functional/bind.h"
#include "base/metrics/histogram_functions.h"
#include "content/public/browser/render_frame_host.h"
#include "mojo/public/cpp/bindings/callback_helpers.h"
#include "mojo/public/cpp/bindings/remote.h"
#include "services/service_manager/public/cpp/interface_provider.h"
#include "third_party/blink/public/mojom/content_extraction/inner_text.mojom.h"
namespace content_extraction {
using Segments = std::vector<blink::mojom::InnerTextSegmentPtr>;
namespace {
// Returns the number of bytes needed for the combined text.
size_t CalculateTotalStringSize(const blink::mojom::InnerTextFrame& frame) {
size_t size = 0;
for (auto& segment : frame.segments) {
if (segment->is_text()) {
size += segment->get_text().size();
} else if (segment->is_frame()) {
size += CalculateTotalStringSize(*segment->get_frame());
}
}
return size;
}
// Appends the text segments to `result.inner_text` as well as setting
// the node offset.
void AppendFrameSegments(const blink::mojom::InnerTextFrame& frame,
InnerTextResult& result) {
for (const auto& segment : frame.segments) {
if (segment->is_text()) {
result.inner_text.append(segment->get_text());
} else if (segment->is_node_location()) {
result.node_offset = result.inner_text.size();
} else {
AppendFrameSegments(*segment->get_frame(), result);
}
}
}
void OnGotInnerText(base::TimeTicks start_time,
mojo::Remote<blink::mojom::InnerTextAgent> remote_interface,
InnerTextCallback callback,
blink::mojom::InnerTextFramePtr mojo_frame) {
std::unique_ptr<InnerTextResult> result;
if (internal::IsInnerTextFrameValid(mojo_frame)) {
result = internal::CreateInnerTextResult(*mojo_frame);
const base::TimeDelta total_time = base::TimeTicks::Now() - start_time;
base::UmaHistogramTimes("ContentExtraction.InnerText.Time", total_time);
base::UmaHistogramCounts10M("ContentExtraction.InnerText.Size",
result->inner_text.size());
}
base::UmaHistogramBoolean("ContentExtraction.InnerText.ValidResults",
result != nullptr);
std::move(callback).Run(std::move(result));
}
} // namespace
void GetInnerText(content::RenderFrameHost& host,
std::optional<int> node_id,
InnerTextCallback callback) {
if (!host.IsRenderFrameLive()) {
std::move(callback).Run(nullptr);
return;
}
const base::TimeTicks start_time = base::TimeTicks::Now();
mojo::Remote<blink::mojom::InnerTextAgent> agent;
host.GetRemoteInterfaces()->GetInterface(agent.BindNewPipeAndPassReceiver());
auto params = blink::mojom::InnerTextParams::New();
if (node_id) {
params->node_id = *node_id;
}
auto* agent_ptr = agent.get();
agent_ptr->GetInnerText(
std::move(params),
mojo::WrapCallbackWithDefaultInvokeIfNotRun(
base::BindOnce(&OnGotInnerText, start_time, std::move(agent),
std::move(callback)),
nullptr));
}
namespace internal {
bool IsInnerTextFrameValid(const blink::mojom::InnerTextFramePtr& frame) {
if (!frame) {
return false;
}
for (auto& segment : frame->segments) {
if (!segment ||
(!segment->is_text() && !segment->is_frame() &&
!segment->is_node_location()) ||
(segment->is_frame() && !IsInnerTextFrameValid(segment->get_frame()))) {
return false;
}
}
return true;
}
std::unique_ptr<InnerTextResult> CreateInnerTextResult(
const blink::mojom::InnerTextFrame& frame) {
std::unique_ptr<InnerTextResult> result = std::make_unique<InnerTextResult>();
// Have the string reserve enough space for all the text.
result->inner_text.reserve(CalculateTotalStringSize(frame));
AppendFrameSegments(frame, *result);
return result;
}
} // namespace internal
} // namespace content_extraction