blob: 00909eca4c98e3b26d1cc3bccb9b0b711ec619fa [file] [log] [blame]
Avi Drissman8ba1bad2022-09-13 19:22:361// Copyright 2022 The Chromium Authors
Min Qin75bee1b2022-02-05 03:57:342// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Arthur Sonzognib948e672024-07-31 08:29:045#ifdef UNSAFE_BUFFERS_BUILD
6// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
7#pragma allow_unsafe_buffers
8#endif
9
Min Qin75bee1b2022-02-05 03:57:3410#include "components/segmentation_platform/internal/segmentation_ukm_helper.h"
11
12#include "base/bit_cast.h"
13#include "base/metrics/field_trial_params.h"
Min Qin7a9f38902023-05-15 16:27:4614#include "base/rand_util.h"
Min Qin75bee1b2022-02-05 03:57:3415#include "base/strings/string_number_conversions.h"
16#include "base/strings/string_split.h"
Min Qincae984cb2022-05-20 03:08:3417#include "base/time/clock.h"
Salvador Guerrero5783fd62023-05-13 00:28:3218#include "base/time/time.h"
Min Qincae984cb2022-05-20 03:08:3419#include "components/segmentation_platform/internal/constants.h"
Min Qin642ab222022-05-19 21:54:5320#include "components/segmentation_platform/internal/selection/segmentation_result_prefs.h"
Min Qin75bee1b2022-02-05 03:57:3421#include "components/segmentation_platform/internal/stats.h"
22#include "components/segmentation_platform/public/config.h"
23#include "components/segmentation_platform/public/features.h"
Min Qincae984cb2022-05-20 03:08:3424#include "components/segmentation_platform/public/local_state_helper.h"
Min Qin75bee1b2022-02-05 03:57:3425#include "services/metrics/public/cpp/ukm_builders.h"
26#include "services/metrics/public/cpp/ukm_recorder.h"
ssidad353932024-08-27 22:42:2527#include "services/metrics/public/cpp/ukm_source_id.h"
Min Qin75bee1b2022-02-05 03:57:3428
29#define CALL_MEMBER_FN(obj, func) ((obj).*(func))
30#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x)[0])
31
Min Qin1eeb6432023-02-23 23:41:5932using segmentation_platform::SegmentationUkmHelper;
ssid8386fc72022-05-21 00:34:1733using segmentation_platform::proto::SegmentId;
Min Qin75bee1b2022-02-05 03:57:3434using ukm::builders::Segmentation_ModelExecution;
35
ssid7a2fe7012023-07-05 23:16:0936namespace segmentation_platform {
37
Min Qin75bee1b2022-02-05 03:57:3438namespace {
39using UkmMemberFn =
40 Segmentation_ModelExecution& (Segmentation_ModelExecution::*)(int64_t);
41
Xing Liu282dd0d2022-02-24 00:20:3942const UkmMemberFn kSegmentationUkmInputMethods[] = {
Min Qin75bee1b2022-02-05 03:57:3443 &Segmentation_ModelExecution::SetInput0,
44 &Segmentation_ModelExecution::SetInput1,
45 &Segmentation_ModelExecution::SetInput2,
46 &Segmentation_ModelExecution::SetInput3,
47 &Segmentation_ModelExecution::SetInput4,
48 &Segmentation_ModelExecution::SetInput5,
49 &Segmentation_ModelExecution::SetInput6,
50 &Segmentation_ModelExecution::SetInput7,
51 &Segmentation_ModelExecution::SetInput8,
52 &Segmentation_ModelExecution::SetInput9,
53 &Segmentation_ModelExecution::SetInput10,
54 &Segmentation_ModelExecution::SetInput11,
55 &Segmentation_ModelExecution::SetInput12,
56 &Segmentation_ModelExecution::SetInput13,
57 &Segmentation_ModelExecution::SetInput14,
58 &Segmentation_ModelExecution::SetInput15,
59 &Segmentation_ModelExecution::SetInput16,
60 &Segmentation_ModelExecution::SetInput17,
61 &Segmentation_ModelExecution::SetInput18,
62 &Segmentation_ModelExecution::SetInput19,
63 &Segmentation_ModelExecution::SetInput20,
64 &Segmentation_ModelExecution::SetInput21,
65 &Segmentation_ModelExecution::SetInput22,
66 &Segmentation_ModelExecution::SetInput23,
67 &Segmentation_ModelExecution::SetInput24,
68 &Segmentation_ModelExecution::SetInput25,
69 &Segmentation_ModelExecution::SetInput26,
70 &Segmentation_ModelExecution::SetInput27,
71 &Segmentation_ModelExecution::SetInput28,
Min Qin81e6bda2022-12-07 06:44:2772 &Segmentation_ModelExecution::SetInput29,
73 &Segmentation_ModelExecution::SetInput30,
74 &Segmentation_ModelExecution::SetInput31,
75 &Segmentation_ModelExecution::SetInput32,
76 &Segmentation_ModelExecution::SetInput33,
77 &Segmentation_ModelExecution::SetInput34,
78 &Segmentation_ModelExecution::SetInput35,
79 &Segmentation_ModelExecution::SetInput36,
80 &Segmentation_ModelExecution::SetInput37,
81 &Segmentation_ModelExecution::SetInput38,
82 &Segmentation_ModelExecution::SetInput39,
83 &Segmentation_ModelExecution::SetInput40,
84 &Segmentation_ModelExecution::SetInput41,
85 &Segmentation_ModelExecution::SetInput42,
86 &Segmentation_ModelExecution::SetInput43,
87 &Segmentation_ModelExecution::SetInput44,
88 &Segmentation_ModelExecution::SetInput45,
89 &Segmentation_ModelExecution::SetInput46,
90 &Segmentation_ModelExecution::SetInput47,
91 &Segmentation_ModelExecution::SetInput48,
92 &Segmentation_ModelExecution::SetInput49};
Min Qin75bee1b2022-02-05 03:57:3493
Min Qin1eeb6432023-02-23 23:41:5994const UkmMemberFn kSegmentationUkmPredictionResultMethods[] = {
95 &Segmentation_ModelExecution::SetPredictionResult1,
96 &Segmentation_ModelExecution::SetPredictionResult2,
97 &Segmentation_ModelExecution::SetPredictionResult3,
98 &Segmentation_ModelExecution::SetPredictionResult4,
99 &Segmentation_ModelExecution::SetPredictionResult5,
100 &Segmentation_ModelExecution::SetPredictionResult6,
101 &Segmentation_ModelExecution::SetPredictionResult7,
102 &Segmentation_ModelExecution::SetPredictionResult8,
103 &Segmentation_ModelExecution::SetPredictionResult9,
104 &Segmentation_ModelExecution::SetPredictionResult10};
105
Xing Liu282dd0d2022-02-24 00:20:39106const UkmMemberFn kSegmentationUkmOutputMethods[] = {
107 &Segmentation_ModelExecution::SetActualResult,
108 &Segmentation_ModelExecution::SetActualResult2,
109 &Segmentation_ModelExecution::SetActualResult3,
110 &Segmentation_ModelExecution::SetActualResult4,
111 &Segmentation_ModelExecution::SetActualResult5,
112 &Segmentation_ModelExecution::SetActualResult6};
Min Qin7a9f38902023-05-15 16:27:46113
114// 1 out of 100 model execution will be reported.
115const int kDefaultModelExecutionSamplingRate = 100;
116
117int GetModelExecutionSamplingRate() {
118 return base::GetFieldTrialParamByFeatureAsInt(
119 segmentation_platform::features::
120 kSegmentationPlatformModelExecutionSampling,
121 segmentation_platform::kModelExecutionSamplingRateKey,
122 kDefaultModelExecutionSamplingRate);
123}
Min Qin75bee1b2022-02-05 03:57:34124
Min Qin1eeb6432023-02-23 23:41:59125// Helper method to add model prediction results to UKM log.
126void AddPredictionResultToUkmModelExecution(
127 ukm::builders::Segmentation_ModelExecution* model_execution,
128 const std::vector<float>& results) {
129 CHECK_LE(results.size(), ARRAY_SIZE(kSegmentationUkmPredictionResultMethods));
130 for (size_t i = 0; i < results.size(); ++i) {
131 CALL_MEMBER_FN(*model_execution, kSegmentationUkmPredictionResultMethods[i])
132 (SegmentationUkmHelper::FloatToInt64(results[i]));
133 }
134}
135
ssid7a2fe7012023-07-05 23:16:09136std::string GetDebugString(const ModelProvider::Request& input_tensor,
137 const ModelProvider::Response& outputs) {
138 std::stringstream out;
139 out << "Inputs: ";
140 int j = 0;
141 for (const auto& i : input_tensor) {
142 out << j++ << ":" << i << " ";
143 }
144 out << " Outputs: ";
145 j = 0;
146 for (const auto& i : outputs) {
147 out << j++ << ":" << i << " ";
148 }
149 return out.str();
150}
151
152} // namespace
Min Qin75bee1b2022-02-05 03:57:34153
154SegmentationUkmHelper::SegmentationUkmHelper() {
155 Initialize();
156}
157
158SegmentationUkmHelper::~SegmentationUkmHelper() = default;
159
Min Qin75bee1b2022-02-05 03:57:34160// static
161SegmentationUkmHelper* SegmentationUkmHelper::GetInstance() {
162 static base::NoDestructor<SegmentationUkmHelper> helper;
163 return helper.get();
164}
165
Min Qin67dc6b9c2023-02-01 21:00:20166void SegmentationUkmHelper::Initialize() {
Alison Galeb8be9522024-04-16 00:00:31167 // TODO(crbug.com/40887237): Migrate models for these segments to use
Min Qin67dc6b9c2023-02-01 21:00:20168 // `upload_tensors`.
169 allowed_segment_ids_.clear();
170 if (base::FeatureList::IsEnabled(segmentation_platform::features::
171 kSegmentationDefaultReportingSegments)) {
172 allowed_segment_ids_ = base::flat_set<SegmentId>{
173 SegmentId::OPTIMIZATION_TARGET_SEGMENTATION_NEW_TAB,
174 SegmentId::OPTIMIZATION_TARGET_SEGMENTATION_SHARE,
175 SegmentId::OPTIMIZATION_TARGET_SEGMENTATION_VOICE,
Ritika Guptae0537042023-12-13 15:39:19176 SegmentId::OPTIMIZATION_TARGET_SEGMENTATION_DUMMY};
Min Qin67dc6b9c2023-02-01 21:00:20177 }
Min Qin7a9f38902023-05-15 16:27:46178 sampling_rate_ = GetModelExecutionSamplingRate();
179 DCHECK_GE(sampling_rate_, 0);
Min Qin67dc6b9c2023-02-01 21:00:20180}
181
Min Qin75bee1b2022-02-05 03:57:34182ukm::SourceId SegmentationUkmHelper::RecordModelExecutionResult(
ssid8386fc72022-05-21 00:34:17183 SegmentId segment_id,
Min Qin75bee1b2022-02-05 03:57:34184 int64_t model_version,
Jan Turecek9deb2252022-11-23 23:25:04185 const ModelProvider::Request& input_tensor,
Min Qin1eeb6432023-02-23 23:41:59186 const std::vector<float>& results) {
Min Qin75bee1b2022-02-05 03:57:34187 ukm::SourceId source_id = ukm::NoURLSourceId();
Min Qin7a9f38902023-05-15 16:27:46188 // Do some sampling before sending out UKM.
189 if (sampling_rate_ == 0) {
190 return source_id;
191 }
192
193 if (base::RandInt(1, sampling_rate_) > 1) {
194 return source_id;
195 }
Min Qin75bee1b2022-02-05 03:57:34196 ukm::builders::Segmentation_ModelExecution execution_result(source_id);
197
Xing Liu282dd0d2022-02-24 00:20:39198 // Add inputs to ukm message.
199 if (!AddInputsToUkm(&execution_result, segment_id, model_version,
Min Qinf254aa92022-08-24 18:48:42200 input_tensor)) {
Xing Liu282dd0d2022-02-24 00:20:39201 return ukm::kInvalidSourceId;
Min Qinf254aa92022-08-24 18:48:42202 }
Min Qin75bee1b2022-02-05 03:57:34203
Min Qin1eeb6432023-02-23 23:41:59204 AddPredictionResultToUkmModelExecution(&execution_result, results);
205 execution_result.Record(ukm::UkmRecorder::Get());
Min Qin75bee1b2022-02-05 03:57:34206 return source_id;
207}
208
Xing Liu282dd0d2022-02-24 00:20:39209ukm::SourceId SegmentationUkmHelper::RecordTrainingData(
ssid8386fc72022-05-21 00:34:17210 SegmentId segment_id,
Xing Liu282dd0d2022-02-24 00:20:39211 int64_t model_version,
ssidad353932024-08-27 22:42:25212 ukm::SourceId ukm_source_id,
Jan Turecek9deb2252022-11-23 23:25:04213 const ModelProvider::Request& input_tensor,
214 const ModelProvider::Response& outputs,
Min Qind52d150c2022-04-22 05:32:13215 const std::vector<int>& output_indexes,
Arthur Sonzognic571efb2024-01-26 20:26:18216 std::optional<proto::PredictionResult> prediction_result,
217 std::optional<SelectedSegment> selected_segment) {
ssidad353932024-08-27 22:42:25218 ukm::SourceId source_id = ukm_source_id != ukm::kInvalidSourceId
219 ? ukm_source_id
220 : ukm::NoURLSourceId();
Xing Liu282dd0d2022-02-24 00:20:39221 ukm::builders::Segmentation_ModelExecution execution_result(source_id);
222 if (!AddInputsToUkm(&execution_result, segment_id, model_version,
223 input_tensor)) {
224 return ukm::kInvalidSourceId;
225 }
226
227 if (!AddOutputsToUkm(&execution_result, outputs, output_indexes)) {
228 return ukm::kInvalidSourceId;
229 }
230
ritikagupeaf525c2022-11-11 00:53:24231 if (prediction_result.has_value() && prediction_result->result_size() > 0) {
Min Qin1eeb6432023-02-23 23:41:59232 std::vector<float> results(prediction_result->result().begin(),
233 prediction_result->result().end());
234 AddPredictionResultToUkmModelExecution(&execution_result, results);
Salvador Guerrero5783fd62023-05-13 00:28:32235 base::Time prediction_time = base::Time::FromDeltaSinceWindowsEpoch(
236 base::Microseconds(prediction_result->timestamp_us()));
237 execution_result.SetOutputDelaySec(
238 (base::Time::Now() - prediction_time).InSeconds());
Min Qind52d150c2022-04-22 05:32:13239 }
Min Qin642ab222022-05-19 21:54:53240 if (selected_segment.has_value()) {
241 execution_result.SetSelectionResult(selected_segment->segment_id);
242 execution_result.SetOutputDelaySec(
243 (base::Time::Now() - selected_segment->selection_time).InSeconds());
244 }
245
ssid7a2fe7012023-07-05 23:16:09246 VLOG(1) << "Recording training data " << proto::SegmentId_Name(segment_id)
247 << " " << GetDebugString(input_tensor, outputs);
248
Xing Liu282dd0d2022-02-24 00:20:39249 execution_result.Record(ukm::UkmRecorder::Get());
250 return source_id;
251}
252
253bool SegmentationUkmHelper::AddInputsToUkm(
254 ukm::builders::Segmentation_ModelExecution* ukm_builder,
ssid8386fc72022-05-21 00:34:17255 SegmentId segment_id,
Xing Liu282dd0d2022-02-24 00:20:39256 int64_t model_version,
Jan Turecek9deb2252022-11-23 23:25:04257 const ModelProvider::Request& input_tensor) {
Xing Liu282dd0d2022-02-24 00:20:39258 if (input_tensor.size() > ARRAY_SIZE(kSegmentationUkmInputMethods)) {
259 // Don't record UKM if there are too many tensors.
260 stats::RecordTooManyInputTensors(input_tensor.size());
261 return false;
262 }
263
264 ukm_builder->SetOptimizationTarget(segment_id).SetModelVersion(model_version);
265 for (size_t i = 0; i < input_tensor.size(); ++i) {
266 CALL_MEMBER_FN(*ukm_builder, kSegmentationUkmInputMethods[i])
267 (FloatToInt64(input_tensor[i]));
268 }
269 return true;
270}
271
272bool SegmentationUkmHelper::AddOutputsToUkm(
273 ukm::builders::Segmentation_ModelExecution* ukm_builder,
Jan Turecek9deb2252022-11-23 23:25:04274 const ModelProvider::Response& outputs,
Xing Liu282dd0d2022-02-24 00:20:39275 const std::vector<int>& output_indexes) {
276 DCHECK(!outputs.empty());
277 if (outputs.size() != output_indexes.size())
278 return false;
279
280 const int output_methods_size = ARRAY_SIZE(kSegmentationUkmOutputMethods);
281 if (outputs.size() > output_methods_size)
282 return false;
283
284 for (size_t i = 0; i < outputs.size(); ++i) {
285 if (output_indexes[i] >= output_methods_size)
286 return false;
287 CALL_MEMBER_FN(*ukm_builder,
288 kSegmentationUkmOutputMethods[output_indexes[i]])
289 (FloatToInt64(outputs[i]));
290 }
291
292 return true;
293}
294
ssid6036f472023-07-11 00:40:45295bool SegmentationUkmHelper::IsUploadRequested(
Min Qinf254aa92022-08-24 18:48:42296 const proto::SegmentInfo& segment_info) const {
Min Qinf254aa92022-08-24 18:48:42297 return segment_info.model_metadata().upload_tensors() ||
Min Qin67dc6b9c2023-02-01 21:00:20298 allowed_segment_ids_.contains(segment_info.segment_id());
Min Qinf254aa92022-08-24 18:48:42299}
300
Min Qin75bee1b2022-02-05 03:57:34301// static
302int64_t SegmentationUkmHelper::FloatToInt64(float f) {
303 // Encode the float number in IEEE754 double precision.
Peter Kastingcc88ac052022-05-03 09:58:01304 return base::bit_cast<int64_t>(static_cast<double>(f));
Min Qin75bee1b2022-02-05 03:57:34305}
306
Min Qincae984cb2022-05-20 03:08:34307// static
308bool SegmentationUkmHelper::AllowedToUploadData(
309 base::TimeDelta signal_storage_length,
310 base::Clock* clock) {
Min Qin0b78c782022-05-21 01:42:13311 base::Time most_recent_allowed = LocalStateHelper::GetInstance().GetPrefTime(
312 kSegmentationUkmMostRecentAllowedTimeKey);
313 // If the local state is never set, return false.
314 if (most_recent_allowed.is_null() ||
315 most_recent_allowed == base::Time::Max()) {
Salvador Guerrero6ca4da82023-07-11 16:29:01316 VLOG(1) << "UKM consent not granted";
Min Qin0b78c782022-05-21 01:42:13317 return false;
318 }
Salvador Guerrero6ca4da82023-07-11 16:29:01319
320 if (most_recent_allowed + signal_storage_length < clock->Now()) {
321 return true;
322 } else {
323 VLOG(1) << "UKM consent granted on: " << most_recent_allowed
324 << ". Waiting for the model's storage period ("
325 << most_recent_allowed + signal_storage_length
326 << ") to avoid uploading data collected pre-consent";
327 return false;
328 }
Min Qincae984cb2022-05-20 03:08:34329}
330
Min Qin75bee1b2022-02-05 03:57:34331} // namespace segmentation_platform