blob: 27c60c3caab847527331ad7f96b83569a4fb9a44 [file] [log] [blame]
Scott Violet26f9aa62023-10-25 23:29:591// Copyright 2023 The Chromium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_CONTENT_EXTRACTION_INNER_TEXT_H_
6#define CHROME_BROWSER_CONTENT_EXTRACTION_INNER_TEXT_H_
7
8#include <memory>
9#include <string>
10
11#include "base/functional/callback_forward.h"
12#include "third_party/abseil-cpp/absl/types/optional.h"
13#include "third_party/blink/public/mojom/content_extraction/inner_text.mojom-forward.h"
14
15namespace content {
16class RenderFrameHost;
17}
18
19namespace content_extraction {
20
21struct InnerTextResult {
22 // The combined inner-text. See comments in GetInnerText() for details.
23 std::string inner_text;
24
25 // Offset of the supplied node in `inner_text`. Only set if a node-id is
26 // supplied to GetInnerText() and a matching node was found.
27 absl::optional<unsigned> node_offset;
28};
29
30using InnerTextCallback =
31 base::OnceCallback<void(std::unique_ptr<InnerTextResult> result)>;
32
33// Requests the inner-text for the specified `host` as well as all local
34// same-origin iframes. The returned inner-text contains the combined inner-text
35// of all suitable iframes. Only the inner-text of the first body or frameset
36// is used. The text is combined as the iframes are encountered. For example,
37// the following structure:
38// <body>
39// A <iframe src="a.html></iframe>
40// B <iframe src="b.html></iframe>
41// </body>
42// results in the string "A <a-inner-text> B <b-inner-test>" where a-inner-text
43// and b-inner-text are replaced with the inner-text of a.html and b.html.
44//
45// You may also supply a DomNodeId. The callback will be called with the offset
46// of the start of the node in the text.
47//
48// If querying the inner-text fails (renderer crash, or page shutdown during
49// request) then null is supplied to the callback.
50//
51// NOTE: This function services the request as soon as called, it does not wait
52// for the page to finish loading.
53void GetInnerText(content::RenderFrameHost& host,
54 absl::optional<int> node_id,
55 InnerTextCallback callback);
56
57// Exposed for testing.
58namespace internal {
59
60// Returns true if `frame` frame valid.
61bool IsInnerTextFrameValid(const blink::mojom::InnerTextFramePtr& frame);
62
63// Converts `frame` into an InnerTextResult.
64std::unique_ptr<InnerTextResult> CreateInnerTextResult(
65 const blink::mojom::InnerTextFrame& frame);
66
67} // namespace internal
68} // namespace content_extraction
69
70#endif // CHROME_BROWSER_CONTENT_EXTRACTION_INNER_TEXT_H_