Avi Drissman | 8ba1bad | 2022-09-13 19:22:36 | [diff] [blame] | 1 | // Copyright 2018 The Chromium Authors |
Yann Dago | e65b7ee | 2022-01-04 19:01:35 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #ifndef COMPONENTS_URL_MATCHER_URL_UTIL_H_ |
| 6 | #define COMPONENTS_URL_MATCHER_URL_UTIL_H_ |
| 7 | |
| 8 | #include "base/memory/scoped_refptr.h" |
| 9 | #include "base/values.h" |
| 10 | #include "components/url_matcher/url_matcher.h" |
| 11 | #include "components/url_matcher/url_matcher_export.h" |
| 12 | |
| 13 | class GURL; |
| 14 | |
| 15 | namespace url_matcher { |
| 16 | namespace util { |
| 17 | |
Emmanuel Arias Soto | 3c2bbe3 | 2024-12-13 10:44:21 | [diff] [blame] | 18 | // Maximum filters allowed. Filters over this index are ignored. |
| 19 | inline constexpr size_t kMaxFiltersAllowed = 1000; |
| 20 | |
Yann Dago | e65b7ee | 2022-01-04 19:01:35 | [diff] [blame] | 21 | // Converts a ValueList `value` of strings into a vector. Returns true if |
| 22 | // successful. |
| 23 | bool GetAsStringVector(const base::Value* value, std::vector<std::string>* out); |
| 24 | |
| 25 | // Normalizes a URL for matching purposes. |
| 26 | URL_MATCHER_EXPORT GURL Normalize(const GURL& url); |
| 27 | |
| 28 | // Helper function to extract the underlying URL wrapped by services such as |
| 29 | // Google AMP or Google Translate. Returns an empty GURL if `url` doesn't match |
| 30 | // a known format. |
| 31 | URL_MATCHER_EXPORT GURL GetEmbeddedURL(const GURL& url); |
| 32 | |
Nan Lin | 6d07b0e | 2024-11-08 16:23:08 | [diff] [blame] | 33 | // Helper function to extract the underlying URL wrapped by Google AMP viewer. |
| 34 | // Returns an empty GURL if `url` doesn't match a known format. |
| 35 | URL_MATCHER_EXPORT GURL GetGoogleAmpViewerEmbeddedURL(const GURL& url); |
| 36 | |
Yann Dago | e65b7ee | 2022-01-04 19:01:35 | [diff] [blame] | 37 | // Utility struct used to represent a url filter scheme into its components. |
| 38 | struct URL_MATCHER_EXPORT FilterComponents { |
| 39 | FilterComponents(); |
| 40 | FilterComponents(const FilterComponents&) = delete; |
| 41 | FilterComponents(FilterComponents&&); |
| 42 | FilterComponents& operator=(const FilterComponents&) = delete; |
| 43 | FilterComponents& operator=(FilterComponents&&) = default; |
| 44 | |
| 45 | ~FilterComponents(); |
| 46 | |
| 47 | // Returns true if `this` represents the "*" filter. |
| 48 | bool IsWildcard() const; |
| 49 | std::string scheme; |
| 50 | std::string host; |
| 51 | uint16_t port = 0; |
| 52 | std::string path; |
| 53 | std::string query; |
| 54 | // Number of conditions that a url needs to match it to be considered a match |
| 55 | // for this filter. |
| 56 | int number_of_url_matching_conditions = 0; |
| 57 | bool match_subdomains = true; |
| 58 | bool allow = true; |
| 59 | }; |
| 60 | |
| 61 | // Creates a condition set that can be used with the `url_matcher`. `id` needs |
| 62 | // to be a unique number that will be returned by the `url_matcher` if the URL |
| 63 | // matches that condition set. `allow` indicates if it is an allow-list (true) |
| 64 | // or block-list (false) filter. |
| 65 | URL_MATCHER_EXPORT scoped_refptr<url_matcher::URLMatcherConditionSet> |
| 66 | CreateConditionSet(url_matcher::URLMatcher* url_matcher, |
Peter Kasting | 78549f3 | 2022-05-31 18:20:20 | [diff] [blame] | 67 | base::MatcherStringPattern::ID id, |
Yann Dago | e65b7ee | 2022-01-04 19:01:35 | [diff] [blame] | 68 | const std::string& scheme, |
| 69 | const std::string& host, |
| 70 | bool match_subdomains, |
| 71 | uint16_t port, |
| 72 | const std::string& path, |
| 73 | const std::string& query, |
| 74 | bool allow); |
| 75 | |
| 76 | // Splits a URL filter into its components. A GURL isn't used because these |
| 77 | // can be invalid URLs e.g. "google.com". |
| 78 | // Returns false if the URL couldn't be parsed. In case false is returned, |
| 79 | // the values of output parameters are undefined. |
| 80 | // The `filter` should have the format described at |
| 81 | // http://www.chromium.org/administrators/url-blocklist-filter-format and |
| 82 | // accepts wildcards. The `host` is preprocessed so it can be passed to |
| 83 | // URLMatcher for the appropriate condition. The optional username and password |
| 84 | // are ignored. `match_subdomains` specifies whether the filter should include |
| 85 | // subdomains of the hostname (if it is one.) `port` is 0 if none is explicitly |
| 86 | // defined. `path` does not include query parameters. `query` contains the query |
| 87 | // parameters ('?' not included). All arguments are mandatory. |
| 88 | URL_MATCHER_EXPORT bool FilterToComponents(const std::string& filter, |
| 89 | std::string* scheme, |
| 90 | std::string* host, |
| 91 | bool* match_subdomains, |
| 92 | uint16_t* port, |
| 93 | std::string* path, |
| 94 | std::string* query); |
| 95 | |
Emmanuel Arias Soto | 3c2bbe3 | 2024-12-13 10:44:21 | [diff] [blame] | 96 | // Adds a limited number of URL filters `patterns` to a URLMatcher |
| 97 | // `matcher`. The `max_filters` parameter specifies the maximum number of |
| 98 | // filters added. |
| 99 | // |
| 100 | // If `allow` is true, the filters will allow matching URLs; otherwise, they |
| 101 | // block them. The `id` parameter provides a pointer to the ID assigned to the |
| 102 | // filters, incremented for each filter added. |
| 103 | // |
| 104 | // `patterns` should be a list of URL patterns (see format description at |
| 105 | // http://www.chromium.org/administrators/url-blocklist-filter-format). |
| 106 | // |
| 107 | // An optional map to store the generated FilterComponents can be provided |
| 108 | // via |filters|. |
| 109 | URL_MATCHER_EXPORT void AddFiltersWithLimit( |
Yann Dago | e65b7ee | 2022-01-04 19:01:35 | [diff] [blame] | 110 | url_matcher::URLMatcher* matcher, |
| 111 | bool allow, |
Peter Kasting | 78549f3 | 2022-05-31 18:20:20 | [diff] [blame] | 112 | base::MatcherStringPattern::ID* id, |
Roland Bock | 632be63 | 2022-07-15 18:05:36 | [diff] [blame] | 113 | const base::Value::List& patterns, |
Emmanuel Arias Soto | 3c2bbe3 | 2024-12-13 10:44:21 | [diff] [blame] | 114 | std::map<base::MatcherStringPattern::ID, FilterComponents>* filters = |
| 115 | nullptr, |
| 116 | size_t max_filters = kMaxFiltersAllowed); |
Yann Dago | e65b7ee | 2022-01-04 19:01:35 | [diff] [blame] | 117 | |
Emmanuel Arias Soto | 3c2bbe3 | 2024-12-13 10:44:21 | [diff] [blame] | 118 | // Adds a limited number of URL filters `patterns` to a URLMatcher |
| 119 | // `matcher`. The `max_filters` parameter specifies the maximum number of |
| 120 | // filters added. |
| 121 | // |
| 122 | // If `allow` is true, the filters will allow matching URLs; otherwise, they |
| 123 | // block them. The `id` parameter provides a pointer to the ID assigned to the |
| 124 | // filters, incremented for each filter added. |
| 125 | // |
| 126 | // `patterns` should be a list of URL patterns (see format description at |
| 127 | // http://www.chromium.org/administrators/url-blocklist-filter-format). |
| 128 | // |
| 129 | // An optional map to store the generated FilterComponents can be provided |
| 130 | // via |filters|. |
| 131 | URL_MATCHER_EXPORT void AddFiltersWithLimit( |
Yann Dago | e65b7ee | 2022-01-04 19:01:35 | [diff] [blame] | 132 | url_matcher::URLMatcher* matcher, |
| 133 | bool allow, |
Peter Kasting | 78549f3 | 2022-05-31 18:20:20 | [diff] [blame] | 134 | base::MatcherStringPattern::ID* id, |
Yann Dago | e65b7ee | 2022-01-04 19:01:35 | [diff] [blame] | 135 | const std::vector<std::string>& patterns, |
Emmanuel Arias Soto | 3c2bbe3 | 2024-12-13 10:44:21 | [diff] [blame] | 136 | std::map<base::MatcherStringPattern::ID, FilterComponents>* filters = |
| 137 | nullptr, |
| 138 | size_t max_filters = kMaxFiltersAllowed); |
Yann Dago | e65b7ee | 2022-01-04 19:01:35 | [diff] [blame] | 139 | |
Emmanuel Arias Soto | 3c2bbe3 | 2024-12-13 10:44:21 | [diff] [blame] | 140 | URL_MATCHER_EXPORT void AddAllowFiltersWithLimit( |
Yann Dago | e65b7ee | 2022-01-04 19:01:35 | [diff] [blame] | 141 | url_matcher::URLMatcher* matcher, |
Emmanuel Arias Soto | 3c2bbe3 | 2024-12-13 10:44:21 | [diff] [blame] | 142 | const base::Value::List& patterns, |
| 143 | size_t max_filters = kMaxFiltersAllowed); |
| 144 | |
| 145 | URL_MATCHER_EXPORT void AddAllowFiltersWithLimit( |
| 146 | url_matcher::URLMatcher* matcher, |
| 147 | const std::vector<std::string>& patterns, |
| 148 | size_t max_filters = kMaxFiltersAllowed); |
Yann Dago | e65b7ee | 2022-01-04 19:01:35 | [diff] [blame] | 149 | |
| 150 | } // namespace util |
| 151 | } // namespace url_matcher |
| 152 | |
| 153 | #endif // COMPONENTS_URL_MATCHER_URL_UTIL_H_ |