blob: 89a33dfcb62717040074736350db29fdf50c359a [file] [log] [blame]
Avi Drissman8ba1bad2022-09-13 19:22:361// Copyright 2018 The Chromium Authors
Yann Dagoe65b7ee2022-01-04 19:01:352// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef COMPONENTS_URL_MATCHER_URL_UTIL_H_
6#define COMPONENTS_URL_MATCHER_URL_UTIL_H_
7
8#include "base/memory/scoped_refptr.h"
9#include "base/values.h"
10#include "components/url_matcher/url_matcher.h"
11#include "components/url_matcher/url_matcher_export.h"
12
13class GURL;
14
15namespace url_matcher {
16namespace util {
17
18// Converts a ValueList `value` of strings into a vector. Returns true if
19// successful.
20bool GetAsStringVector(const base::Value* value, std::vector<std::string>* out);
21
22// Normalizes a URL for matching purposes.
23URL_MATCHER_EXPORT GURL Normalize(const GURL& url);
24
25// Helper function to extract the underlying URL wrapped by services such as
26// Google AMP or Google Translate. Returns an empty GURL if `url` doesn't match
27// a known format.
28URL_MATCHER_EXPORT GURL GetEmbeddedURL(const GURL& url);
29
30// Utility struct used to represent a url filter scheme into its components.
31struct URL_MATCHER_EXPORT FilterComponents {
32 FilterComponents();
33 FilterComponents(const FilterComponents&) = delete;
34 FilterComponents(FilterComponents&&);
35 FilterComponents& operator=(const FilterComponents&) = delete;
36 FilterComponents& operator=(FilterComponents&&) = default;
37
38 ~FilterComponents();
39
40 // Returns true if `this` represents the "*" filter.
41 bool IsWildcard() const;
42 std::string scheme;
43 std::string host;
44 uint16_t port = 0;
45 std::string path;
46 std::string query;
47 // Number of conditions that a url needs to match it to be considered a match
48 // for this filter.
49 int number_of_url_matching_conditions = 0;
50 bool match_subdomains = true;
51 bool allow = true;
52};
53
54// Creates a condition set that can be used with the `url_matcher`. `id` needs
55// to be a unique number that will be returned by the `url_matcher` if the URL
56// matches that condition set. `allow` indicates if it is an allow-list (true)
57// or block-list (false) filter.
58URL_MATCHER_EXPORT scoped_refptr<url_matcher::URLMatcherConditionSet>
59CreateConditionSet(url_matcher::URLMatcher* url_matcher,
Peter Kasting78549f32022-05-31 18:20:2060 base::MatcherStringPattern::ID id,
Yann Dagoe65b7ee2022-01-04 19:01:3561 const std::string& scheme,
62 const std::string& host,
63 bool match_subdomains,
64 uint16_t port,
65 const std::string& path,
66 const std::string& query,
67 bool allow);
68
69// Splits a URL filter into its components. A GURL isn't used because these
70// can be invalid URLs e.g. "google.com".
71// Returns false if the URL couldn't be parsed. In case false is returned,
72// the values of output parameters are undefined.
73// The `filter` should have the format described at
74// http://www.chromium.org/administrators/url-blocklist-filter-format and
75// accepts wildcards. The `host` is preprocessed so it can be passed to
76// URLMatcher for the appropriate condition. The optional username and password
77// are ignored. `match_subdomains` specifies whether the filter should include
78// subdomains of the hostname (if it is one.) `port` is 0 if none is explicitly
79// defined. `path` does not include query parameters. `query` contains the query
80// parameters ('?' not included). All arguments are mandatory.
81URL_MATCHER_EXPORT bool FilterToComponents(const std::string& filter,
82 std::string* scheme,
83 std::string* host,
84 bool* match_subdomains,
85 uint16_t* port,
86 std::string* path,
87 std::string* query);
88
89// Adds the filters in `patterns` to `url_matcher` as a ConditionSet::Vector.
90// `matcher` is the URLMatcher where filters are added.
91// `allow` specifies whether the filter accepts or blocks the macthed urls.
92// `id` is the id of given to the filter being added.
93// `patterns` is a list of url schemes following the format described
94// http://www.chromium.org/administrators/url-blocklist-filter-format and
95// accepts wildcards.
96// `filters` is an optional map of id to FilterComponent where the generated
97// FilterComponent will be added.
98URL_MATCHER_EXPORT void AddFilters(
99 url_matcher::URLMatcher* matcher,
100 bool allow,
Peter Kasting78549f32022-05-31 18:20:20101 base::MatcherStringPattern::ID* id,
Roland Bock632be632022-07-15 18:05:36102 const base::Value::List& patterns,
Peter Kasting78549f32022-05-31 18:20:20