blob: 23512f50a3cc42ab54852c78f8a6507ace57d8c2 [file] [log] [blame]
Marlon Facey3cd673a152024-07-26 16:16:131// Copyright 2024 The Chromium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/url_deduplication/url_deduplication_helper.h"
6
7#include <memory>
8#include <vector>
9
Roman Arorad15fad0e62024-08-12 19:00:0310#include "base/strings/strcat.h"
Marlon Facey3cd673a152024-07-26 16:16:1311#include "components/url_deduplication/deduplication_strategy.h"
12#include "components/url_deduplication/url_strip_handler.h"
13#include "testing/gmock/include/gmock/gmock.h"
14#include "testing/gtest/include/gtest/gtest.h"
15#include "url/gurl.h"
16
17namespace url_deduplication {
18
19class MockURLStripHandler : public URLStripHandler {
20 public:
21 MockURLStripHandler() = default;
22 MockURLStripHandler(const MockURLStripHandler&) = delete;
23 MockURLStripHandler& operator=(const MockURLStripHandler&) = delete;
24 ~MockURLStripHandler() override = default;
25
26 MOCK_METHOD1(StripExtraParams, GURL(GURL url));
27};
28
29class URLDeduplicationHelperTest : public ::testing::Test {
30 public:
31 URLDeduplicationHelperTest() = default;
32
33 void InitHelper(std::vector<std::unique_ptr<URLStripHandler>> handlers,
34 DeduplicationStrategy strategy) {
35 helper_ =
36 std::make_unique<URLDeduplicationHelper>(std::move(handlers), strategy);
37 }
38
39 URLDeduplicationHelper* Helper() { return helper_.get(); }
40
41 private:
42 std::unique_ptr<URLDeduplicationHelper> helper_;
43};
44
Roman Arorad15fad0e62024-08-12 19:00:0345const std::string kSamplePageTitle = "Sample page title";
46
Marlon Facey3cd673a152024-07-26 16:16:1347TEST_F(URLDeduplicationHelperTest, StripURL) {
48 GURL full_url = GURL(
49 "https://www.foopayment.com:123?ref=foo"
50 "#heading=h.xaresuk9ir9a&password=test1&username=test2?q=test");
51 DeduplicationStrategy strategy;
52 strategy.excluded_prefixes = {"www."};
53 strategy.update_scheme = true;
54 strategy.clear_username = true;
55 strategy.clear_password = true;
56 strategy.clear_query = true;
57 strategy.clear_ref = true;
58 strategy.clear_port = true;
59 InitHelper({}, strategy);
Roman Arorad15fad0e62024-08-12 19:00:0360 std::string stripped_url =
61 Helper()->ComputeURLDeduplicationKey(full_url, kSamplePageTitle);
Marlon Facey3cd673a152024-07-26 16:16:1362 ASSERT_EQ("http://foopayment.com/", stripped_url);
63}
64
65TEST_F(URLDeduplicationHelperTest, StripURLWithHandlers) {
66 GURL full_url =
67 GURL("https://www.google.com/search#heading=h.xaresuk9ir9a?q=test");
68 DeduplicationStrategy strategy;
69 auto handler1 = std::make_unique<MockURLStripHandler>();
70 auto handler2 = std::make_unique<MockURLStripHandler>();
71 EXPECT_CALL(*handler1, StripExtraParams(testing::_))
72 .Times(1)
73 .WillOnce(testing::Invoke(
74 [](GURL url) { return GURL("http://google.com/search"); }));
75 EXPECT_CALL(*handler2, StripExtraParams(testing::_)).Times(0);
76 std::vector<std::unique_ptr<URLStripHandler>> handlers;
77 handlers.push_back(std::move(handler1));
78 handlers.push_back(std::move(handler2));
79 InitHelper(std::move(handlers), strategy);
Roman Arorad15fad0e62024-08-12 19:00:0380 std::string stripped_url =
81 Helper()->ComputeURLDeduplicationKey(full_url, kSamplePageTitle);
Marlon Facey3cd673a152024-07-26 16:16:1382 ASSERT_EQ("http://google.com/search", stripped_url);
83}
84
Roman Arorad15fad0e62024-08-12 19:00:0385TEST_F(URLDeduplicationHelperTest, DeduplicateByDomainAndTitle) {
86 DeduplicationStrategy strategy;
87 strategy.clear_path = true;
88 strategy.include_title = true;
89 InitHelper({}, strategy);
90
91 constexpr char kSampleCalendarPageTitle[] =
92 "Google.com - Calendar - Week of Januaray 5, 2024";
93 constexpr char kSampleBaseCalendarUrl[] = "https://calendar.google.com/";
94 const std::string expected_dedup_url_key =
95 base::StrCat({kSampleBaseCalendarUrl, "#", kSampleCalendarPageTitle});
96 EXPECT_EQ(expected_dedup_url_key,
97 Helper()->ComputeURLDeduplicationKey(
98 GURL(base::StrCat({kSampleBaseCalendarUrl, "calendar/u/0/r"})),
99 kSampleCalendarPageTitle));
100 EXPECT_EQ(expected_dedup_url_key,
101 Helper()->ComputeURLDeduplicationKey(
102 GURL(base::StrCat(
103 {kSampleBaseCalendarUrl, "calendar/u/0/r/week/2024/1/05"})),
104 kSampleCalendarPageTitle));
105}
106
Marlon Facey644dfa22025-04-21 20:55:26107TEST_F(URLDeduplicationHelperTest, StripPrefix) {
108 GURL url_1 = GURL("https://accounts.google.com");
109 GURL url_2 = GURL("https://myaccount.google.com");
110 GURL url_3 = GURL("https://login.corp.google.com");
111 DeduplicationStrategy strategy;
112 strategy.excluded_prefixes = {"www.", "accounts.", "myaccount.",
113 "login.corp."};
114 InitHelper({}, strategy);
115 std::string stripped_url_1 =
116 Helper()->ComputeURLDeduplicationKey(url_1, kSamplePageTitle);
117 std::string stripped_url_2 =
118 Helper()->ComputeURLDeduplicationKey(url_2, kSamplePageTitle);
119 std::string stripped_url_3 =
120 Helper()->ComputeURLDeduplicationKey(url_3, kSamplePageTitle);
121 ASSERT_EQ(stripped_url_1, stripped_url_2);
122 ASSERT_EQ(stripped_url_1, stripped_url_3);
123}
124
Marlon Facey3cd673a152024-07-26 16:16:13125} // namespace url_deduplication