blob: 24ec66ebd9f0bfd86c5e1b32153eea304050d15d [file] [log] [blame]
Avi Drissman8ba1bad2022-09-13 19:22:361// Copyright 2018 The Chromium Authors
Yann Dagoe65b7ee2022-01-04 19:01:352// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef COMPONENTS_URL_MATCHER_URL_UTIL_H_
6#define COMPONENTS_URL_MATCHER_URL_UTIL_H_
7
8#include "base/memory/scoped_refptr.h"
9#include "base/values.h"
10#include "components/url_matcher/url_matcher.h"
11#include "components/url_matcher/url_matcher_export.h"
12
13class GURL;
14
15namespace url_matcher {
16namespace util {
17
Emmanuel Arias Soto3c2bbe32024-12-13 10:44:2118// Maximum filters allowed. Filters over this index are ignored.
19inline constexpr size_t kMaxFiltersAllowed = 1000;
20
Yann Dagoe65b7ee2022-01-04 19:01:3521// Converts a ValueList `value` of strings into a vector. Returns true if
22// successful.
23bool GetAsStringVector(const base::Value* value, std::vector<std::string>* out);
24
25// Normalizes a URL for matching purposes.
26URL_MATCHER_EXPORT GURL Normalize(const GURL& url);
27
28// Helper function to extract the underlying URL wrapped by services such as
29// Google AMP or Google Translate. Returns an empty GURL if `url` doesn't match
30// a known format.
31URL_MATCHER_EXPORT GURL GetEmbeddedURL(const GURL& url);
32
Nan Lin6d07b0e2024-11-08 16:23:0833// Helper function to extract the underlying URL wrapped by Google AMP viewer.
34// Returns an empty GURL if `url` doesn't match a known format.
35URL_MATCHER_EXPORT GURL GetGoogleAmpViewerEmbeddedURL(const GURL& url);
36
Yann Dagoe65b7ee2022-01-04 19:01:3537// Utility struct used to represent a url filter scheme into its components.
38struct URL_MATCHER_EXPORT FilterComponents {
39 FilterComponents();
40 FilterComponents(const FilterComponents&) = delete;
41 FilterComponents(FilterComponents&&);
42 FilterComponents& operator=(const FilterComponents&) = delete;
43 FilterComponents& operator=(FilterComponents&&) = default;
44
45 ~FilterComponents();
46
47 // Returns true if `this` represents the "*" filter.
48 bool IsWildcard() const;
49 std::string scheme;
50 std::string host;
51 uint16_t port = 0;
52 std::string path;
53 std::string query;
54 // Number of conditions that a url needs to match it to be considered a match
55 // for this filter.
56 int number_of_url_matching_conditions = 0;
57 bool match_subdomains = true;
58 bool allow = true;
59};
60
61// Creates a condition set that can be used with the `url_matcher`. `id` needs
62// to be a unique number that will be returned by the `url_matcher` if the URL
63// matches that condition set. `allow` indicates if it is an allow-list (true)
64// or block-list (false) filter.
65URL_MATCHER_EXPORT scoped_refptr<url_matcher::URLMatcherConditionSet>
66CreateConditionSet(url_matcher::URLMatcher* url_matcher,
Peter Kasting78549f32022-05-31 18:20:2067 base::MatcherStringPattern::ID id,
Yann Dagoe65b7ee2022-01-04 19:01:3568 const std::string& scheme,
69 const std::string& host,
70 bool match_subdomains,
71 uint16_t port,
72 const std::string& path,
73 const std::string& query,
74 bool allow);
75
76// Splits a URL filter into its components. A GURL isn't used because these
77// can be invalid URLs e.g. "google.com".
78// Returns false if the URL couldn't be parsed. In case false is returned,
79// the values of output parameters are undefined.
80// The `filter` should have the format described at
81// http://www.chromium.org/administrators/url-blocklist-filter-format and
82// accepts wildcards. The `host` is preprocessed so it can be passed to
83// URLMatcher for the appropriate condition. The optional username and password
84// are ignored. `match_subdomains` specifies whether the filter should include
85// subdomains of the hostname (if it is one.) `port` is 0 if none is explicitly
86// defined. `path` does not include query parameters. `query` contains the query
87// parameters ('?' not included). All arguments are mandatory.
88URL_MATCHER_EXPORT bool FilterToComponents(const std::string& filter,
89 std::string* scheme,
90 std::string* host,
91 bool* match_subdomains,
92 uint16_t* port,
93 std::string* path,
94 std::string* query);
95
Emmanuel Arias Soto3c2bbe32024-12-13 10:44:2196// Adds a limited number of URL filters `patterns` to a URLMatcher
97// `matcher`. The `max_filters` parameter specifies the maximum number of
98// filters added.
99//
100// If `allow` is true, the filters will allow matching URLs; otherwise, they
101// block them. The `id` parameter provides a pointer to the ID assigned to the
102// filters, incremented for each filter added.
103//
104// `patterns` should be a list of URL patterns (see format description at
105// http://www.chromium.org/administrators/url-blocklist-filter-format).
106//
107// An optional map to store the generated FilterComponents can be provided
108// via |filters|.
109URL_MATCHER_EXPORT void AddFiltersWithLimit(
Yann Dagoe65b7ee2022-01-04 19:01:35110 url_matcher::URLMatcher* matcher,
111 bool allow,
Peter Kasting78549f32022-05-31 18:20:20112 base::MatcherStringPattern::ID* id,
Roland Bock632be632022-07-15 18:05:36113 const base::Value::List& patterns,
Emmanuel Arias Soto3c2bbe32024-12-13 10:44:21114 std::map<base::MatcherStringPattern::ID, FilterComponents>* filters =
115 nullptr,
116 size_t max_filters = kMaxFiltersAllowed);
Yann Dagoe65b7ee2022-01-04 19:01:35117
Emmanuel Arias Soto3c2bbe32024-12-13 10:44:21118// Adds a limited number of URL filters `patterns` to a URLMatcher
119// `matcher`. The `max_filters` parameter specifies the maximum number of
120// filters added.
121//
122// If `allow` is true, the filters will allow matching URLs; otherwise, they
123// block them. The `id` parameter provides a pointer to the ID assigned to the
124// filters, incremented for each filter added.
125//
126// `patterns` should be a list of URL patterns (see format description at
127// http://www.chromium.org/administrators/url-blocklist-filter-format).
128//
129// An optional map to store the generated FilterComponents can be provided
130// via |filters|.
131URL_MATCHER_EXPORT void AddFiltersWithLimit(
Yann Dagoe65b7ee2022-01-04 19:01:35132 url_matcher::URLMatcher* matcher,
133 bool allow,
Peter Kasting78549f32022-05-31 18:20:20134 base::MatcherStringPattern::ID* id,
Yann Dagoe65b7ee2022-01-04 19:01:35135 const std::vector<std::string>& patterns,
Emmanuel Arias Soto3c2bbe32024-12-13 10:44:21136 std::map<base::MatcherStringPattern::ID, FilterComponents>* filters =
137 nullptr,
138 size_t max_filters = kMaxFiltersAllowed);
Yann Dagoe65b7ee2022-01-04 19:01:35139
Emmanuel Arias Soto3c2bbe32024-12-13 10:44:21140URL_MATCHER_EXPORT void AddAllowFiltersWithLimit(
Yann Dagoe65b7ee2022-01-04 19:01:35141 url_matcher::URLMatcher* matcher,
Emmanuel Arias Soto3c2bbe32024-12-13 10:44:21142 const base::Value::List& patterns,
143 size_t max_filters = kMaxFiltersAllowed);
144
145URL_MATCHER_EXPORT void AddAllowFiltersWithLimit(
146 url_matcher::URLMatcher* matcher,
147 const std::vector<std::string>& patterns,
148 size_t max_filters = kMaxFiltersAllowed);
Yann Dagoe65b7ee2022-01-04 19:01:35149
150} // namespace util
151} // namespace url_matcher
152
153#endif // COMPONENTS_URL_MATCHER_URL_UTIL_H_