Add Metadata to Metric Logs to analyze the log sources.

For UKM logs, this propagates information on the type of events
in them, in particular whether the log has appkm events, regular
ukm events, or a combination of both. This will allow us to
customize the network annotations used for uploading these logs,
which will be done in a follow-up CL.

BUG=chromium:1497020
BUG=b:302590163

Change-Id: I16c2ae325809e6d1b9b09dc3483600f174c2a85f
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/4933162
Commit-Queue: Nik K <[email protected]>
Reviewed-by: Nicolas Ouellet-Payeur <[email protected]>
Reviewed-by: Alexei Svitkine <[email protected]>
Auto-Submit: Nik K <[email protected]>
Cr-Commit-Position: refs/heads/main@{#1222477}
diff --git a/components/metrics/log_store.h b/components/metrics/log_store.h
index a71a511..90b7be9 100644
--- a/components/metrics/log_store.h
+++ b/components/metrics/log_store.h
@@ -8,6 +8,7 @@
 #include <string>
 
 #include "base/strings/string_piece.h"
+#include "components/metrics/metrics_log.h"
 #include "third_party/abseil-cpp/absl/types/optional.h"
 
 namespace metrics {
@@ -44,6 +45,9 @@
   // Will trigger a DCHECK if there is no staged log.
   virtual absl::optional<uint64_t> staged_log_user_id() const = 0;
 
+  // LogMetadata associated with the staged log.
+  virtual const LogMetadata staged_log_metadata() const = 0;
+
   // Populates staged_log() with the next stored log to send.
   // The order in which logs are staged is up to the implementor.
   // The staged_log must remain the same even if additional logs are added.
diff --git a/components/metrics/metrics_log.cc b/components/metrics/metrics_log.cc
index 20a1151..2fe0443 100644
--- a/components/metrics/metrics_log.cc
+++ b/components/metrics/metrics_log.cc
@@ -66,8 +66,11 @@
     : samples_count(absl::nullopt), user_id(absl::nullopt) {}
 LogMetadata::LogMetadata(
     const absl::optional<base::HistogramBase::Count> samples_count,
-    const absl::optional<uint64_t> user_id)
-    : samples_count(samples_count), user_id(user_id) {}
+    const absl::optional<uint64_t> user_id,
+    const absl::optional<metrics::UkmLogSourceType> log_source_type)
+    : samples_count(samples_count),
+      user_id(user_id),
+      log_source_type(log_source_type) {}
 LogMetadata::LogMetadata(const LogMetadata& other) = default;
 LogMetadata::~LogMetadata() = default;
 
diff --git a/components/metrics/metrics_log.h b/components/metrics/metrics_log.h
index c12904b..6cc941ce 100644
--- a/components/metrics/metrics_log.h
+++ b/components/metrics/metrics_log.h
@@ -37,11 +37,20 @@
 
 namespace metrics {
 
+// This SourceType is saved in Local state by unsent_log_store.cc and entries
+// should not be renumbered.
+enum UkmLogSourceType {
+  UKM_ONLY = 0,            // Log contains only UKM data.
+  APPKM_ONLY = 1,          // Log contains only AppKM data.
+  BOTH_UKM_AND_APPKM = 2,  // Log contains both AppKM and UKM data.
+};
+
 // Holds optional metadata associated with a log to be stored.
 struct LogMetadata {
   LogMetadata();
   LogMetadata(absl::optional<base::HistogramBase::Count> samples_count,
-              absl::optional<uint64_t> user_id);
+              absl::optional<uint64_t> user_id,
+              absl::optional<UkmLogSourceType> log_source_type);
   LogMetadata(const LogMetadata& other);
   ~LogMetadata();
 
@@ -54,6 +63,9 @@
 
   // User id associated with the log.
   absl::optional<uint64_t> user_id;
+
+  // For UKM logs, indicates the type of data.
+  absl::optional<UkmLogSourceType> log_source_type;
 };
 
 class MetricsServiceClient;
diff --git a/components/metrics/metrics_log_store.cc b/components/metrics/metrics_log_store.cc
index d88e247..f2b2a7e 100644
--- a/components/metrics/metrics_log_store.cc
+++ b/components/metrics/metrics_log_store.cc
@@ -157,6 +157,10 @@
   return get_staged_log_queue()->staged_log_user_id();
 }
 
+const LogMetadata MetricsLogStore::staged_log_metadata() const {
+  return get_staged_log_queue()->staged_log_metadata();
+}
+
 bool MetricsLogStore::has_alternate_ongoing_log_store() const {
   return alternate_ongoing_log_queue_ != nullptr;
 }
diff --git a/components/metrics/metrics_log_store.h b/components/metrics/metrics_log_store.h
index a3a12f9..ee16e42 100644
--- a/components/metrics/metrics_log_store.h
+++ b/components/metrics/metrics_log_store.h
@@ -125,6 +125,7 @@
   const std::string& staged_log_hash() const override;
   const std::string& staged_log_signature() const override;
   absl::optional<uint64_t> staged_log_user_id() const override;
+  const LogMetadata staged_log_metadata() const override;
   void StageNextLog() override;
   void DiscardStagedLog(base::StringPiece reason = "") override;
   void MarkStagedLogAsSent() override;
diff --git a/components/metrics/metrics_log_uploader.h b/components/metrics/metrics_log_uploader.h
index 9852c08b..06d86ac 100644
--- a/components/metrics/metrics_log_uploader.h
+++ b/components/metrics/metrics_log_uploader.h
@@ -9,6 +9,7 @@
 
 #include "base/functional/callback.h"
 #include "base/strings/string_piece.h"
+#include "components/metrics/metrics_log.h"
 
 namespace metrics {
 
@@ -53,6 +54,7 @@
   // doesn't do this validation, then |log_hash| and |log_signature| can be
   // ignored.
   virtual void UploadLog(const std::string& compressed_log_data,
+                         const LogMetadata& log_metadata,
                          const std::string& log_hash,
                          const std::string& log_signature,
                          const ReportingInfo& reporting_info) = 0;
diff --git a/components/metrics/net/net_metrics_log_uploader.cc b/components/metrics/net/net_metrics_log_uploader.cc
index 277d7b8..22fa81ca 100644
--- a/components/metrics/net/net_metrics_log_uploader.cc
+++ b/components/metrics/net/net_metrics_log_uploader.cc
@@ -15,6 +15,7 @@
 #include "base/strings/string_number_conversions.h"
 #include "components/encrypted_messages/encrypted_message.pb.h"
 #include "components/encrypted_messages/message_encrypter.h"
+#include "components/metrics/metrics_log.h"
 #include "components/metrics/metrics_log_uploader.h"
 #include "net/base/load_flags.h"
 #include "net/base/url_util.h"
@@ -45,7 +46,8 @@
     "No server upload URLs specified. Will not attempt to retransmit.";
 
 net::NetworkTrafficAnnotationTag GetNetworkTrafficAnnotation(
-    const metrics::MetricsLogUploader::MetricServiceType& service_type) {
+    const metrics::MetricsLogUploader::MetricServiceType& service_type,
+    const metrics::LogMetadata& log_metadata) {
   // The code in this function should remain so that we won't need a default
   // case that does not have meaningful annotation.
   // Structured Metrics is an UMA consented metric service.
@@ -85,6 +87,9 @@
         })");
   }
   DCHECK_EQ(service_type, metrics::MetricsLogUploader::UKM);
+
+  // TODO(b/308003806) Create an annotation for AppKM.
+
   return net::DefineNetworkTrafficAnnotation("metrics_report_ukm", R"(
       semantics {
         sender: "Metrics UKM Log Uploader"
@@ -248,6 +253,7 @@
 NetMetricsLogUploader::~NetMetricsLogUploader() = default;
 
 void NetMetricsLogUploader::UploadLog(const std::string& compressed_log_data,
+                                      const LogMetadata& log_metadata,
                                       const std::string& log_hash,
                                       const std::string& log_signature,
                                       const ReportingInfo& reporting_info) {
@@ -258,16 +264,17 @@
       reporting_info.last_error_code() != 0 &&
       reporting_info.last_attempt_was_https() &&
       !insecure_server_url_.is_empty()) {
-    UploadLogToURL(compressed_log_data, log_hash, log_signature, reporting_info,
-                   insecure_server_url_);
+    UploadLogToURL(compressed_log_data, log_metadata, log_hash, log_signature,
+                   reporting_info, insecure_server_url_);
     return;
   }
-  UploadLogToURL(compressed_log_data, log_hash, log_signature, reporting_info,
-                 server_url_);
+  UploadLogToURL(compressed_log_data, log_metadata, log_hash, log_signature,
+                 reporting_info, server_url_);
 }
 
 void NetMetricsLogUploader::UploadLogToURL(
     const std::string& compressed_log_data,
+    const LogMetadata& log_metadata,
     const std::string& log_hash,
     const std::string& log_signature,
     const ReportingInfo& reporting_info,
@@ -332,7 +339,7 @@
   }
 
   net::NetworkTrafficAnnotationTag traffic_annotation =
-      GetNetworkTrafficAnnotation(service_type_);
+      GetNetworkTrafficAnnotation(service_type_, log_metadata);
   url_loader_ = network::SimpleURLLoader::Create(std::move(resource_request),
                                                  traffic_annotation);
 
diff --git a/components/metrics/net/net_metrics_log_uploader.h b/components/metrics/net/net_metrics_log_uploader.h
index 4201de3f..bc9782d 100644
--- a/components/metrics/net/net_metrics_log_uploader.h
+++ b/components/metrics/net/net_metrics_log_uploader.h
@@ -9,6 +9,7 @@
 #include <string>
 
 #include "base/strings/string_piece.h"
+#include "components/metrics/metrics_log.h"
 #include "components/metrics/metrics_log_uploader.h"
 #include "third_party/metrics_proto/reporting_info.pb.h"
 #include "url/gurl.h"
@@ -53,6 +54,7 @@
   // MetricsLogUploader:
   // Uploads a log to the server_url specified in the constructor.
   void UploadLog(const std::string& compressed_log_data,
+                 const LogMetadata& log_metadata,
                  const std::string& log_hash,
                  const std::string& log_signature,
                  const ReportingInfo& reporting_info) override;
@@ -60,6 +62,7 @@
  private:
   // Uploads a log to a URL passed as a parameter.
   void UploadLogToURL(const std::string& compressed_log_data,
+                      const LogMetadata& log_metadata,
                       const std::string& log_hash,
                       const std::string& log_signature,
                       const ReportingInfo& reporting_info,
diff --git a/components/metrics/net/net_metrics_log_uploader_unittest.cc b/components/metrics/net/net_metrics_log_uploader_unittest.cc
index 640e4eb..8fb136db 100644
--- a/components/metrics/net/net_metrics_log_uploader_unittest.cc
+++ b/components/metrics/net/net_metrics_log_uploader_unittest.cc
@@ -50,8 +50,9 @@
         base::BindRepeating(
             &NetMetricsLogUploaderTest::OnUploadCompleteReuseUploader,
             base::Unretained(this)));
-    uploader_->UploadLog("initial_dummy_data", "initial_dummy_hash",
-                         "initial_dummy_signature", reporting_info);
+    uploader_->UploadLog("initial_dummy_data", LogMetadata(),
+                         "initial_dummy_hash", "initial_dummy_signature",
+                         reporting_info);
   }
 
   void CreateUploaderAndUploadToSecureURL(const std::string& url) {
@@ -61,8 +62,8 @@
         MetricsLogUploader::UMA,
         base::BindRepeating(&NetMetricsLogUploaderTest::DummyOnUploadComplete,
                             base::Unretained(this)));
-    uploader_->UploadLog("dummy_data", "dummy_hash", "dummy_signature",
-                         dummy_reporting_info);
+    uploader_->UploadLog("dummy_data", LogMetadata(), "dummy_hash",
+                         "dummy_signature", dummy_reporting_info);
   }
 
   void CreateUploaderAndUploadToInsecureURL() {
@@ -76,8 +77,8 @@
     // Compress the data since the encryption code expects a compressed log,
     // and tries to decompress it before encrypting it.
     compression::GzipCompress("dummy_data", &compressed_message);
-    uploader_->UploadLog(compressed_message, "dummy_hash", "dummy_signature",
-                         dummy_reporting_info);
+    uploader_->UploadLog(compressed_message, LogMetadata(), "dummy_hash",
+                         "dummy_signature", dummy_reporting_info);
   }
 
   void DummyOnUploadComplete(int response_code,
@@ -97,8 +98,8 @@
     if (on_upload_complete_count_ == 1) {
       ReportingInfo reporting_info;
       reporting_info.set_attempt_count(20);
-      uploader_->UploadLog("dummy_data", "dummy_hash", "dummy_signature",
-                           reporting_info);
+      uploader_->UploadLog("dummy_data", LogMetadata(), "dummy_hash",
+                           "dummy_signature", reporting_info);
     }
     log_was_force_discarded_ = force_discard;
   }
diff --git a/components/metrics/reporting_service.cc b/components/metrics/reporting_service.cc
index 556b128..6c79a09 100644
--- a/components/metrics/reporting_service.cc
+++ b/components/metrics/reporting_service.cc
@@ -188,7 +188,8 @@
         MetricsLogsEventManager::LogEvent::kLogUploading,
         log_store()->staged_log_hash());
   }
-  log_uploader_->UploadLog(log_store()->staged_log(), hash, signature,
+  log_uploader_->UploadLog(log_store()->staged_log(),
+                           log_store()->staged_log_metadata(), hash, signature,
                            reporting_info_);
 }
 
diff --git a/components/metrics/reporting_service_unittest.cc b/components/metrics/reporting_service_unittest.cc
index 1b70366b..c9e65e3 100644
--- a/components/metrics/reporting_service_unittest.cc
+++ b/components/metrics/reporting_service_unittest.cc
@@ -16,6 +16,7 @@
 #include "base/task/single_thread_task_runner.h"
 #include "base/test/test_simple_task_runner.h"
 #include "components/metrics/log_store.h"
+#include "components/metrics/metrics_log.h"
 #include "components/metrics/test/test_metrics_service_client.h"
 #include "components/prefs/testing_pref_service.h"
 #include "testing/gtest/include/gtest/gtest.h"
@@ -30,11 +31,14 @@
   explicit TestLog(const std::string& log) : log(log), user_id(absl::nullopt) {}
   TestLog(const std::string& log, uint64_t user_id)
       : log(log), user_id(user_id) {}
+  TestLog(const std::string& log, uint64_t user_id, LogMetadata log_metadata)
+      : log(log), user_id(user_id), log_metadata(log_metadata) {}
   TestLog(const TestLog& other) = default;
   ~TestLog() = default;
 
   const std::string log;
   const absl::optional<uint64_t> user_id;
+  const LogMetadata log_metadata;
 };
 
 const char kTestUploadUrl[] = "test_url";
@@ -57,6 +61,9 @@
   absl::optional<uint64_t> staged_log_user_id() const override {
     return logs_.front().user_id;
   }
+  const LogMetadata staged_log_metadata() const override {
+    return logs_.front().log_metadata;
+  }
   const std::string& staged_log_signature() const override {
     return base::EmptyString();
   }
diff --git a/components/metrics/test/test_metrics_log_uploader.cc b/components/metrics/test/test_metrics_log_uploader.cc
index 01f3e91..b993371 100644
--- a/components/metrics/test/test_metrics_log_uploader.cc
+++ b/components/metrics/test/test_metrics_log_uploader.cc
@@ -23,6 +23,7 @@
 }
 
 void TestMetricsLogUploader::UploadLog(const std::string& compressed_log_data,
+                                       const LogMetadata& log_metadata,
                                        const std::string& log_hash,
                                        const std::string& log_signature,
                                        const ReportingInfo& reporting_info) {
diff --git a/components/metrics/test/test_metrics_log_uploader.h b/components/metrics/test/test_metrics_log_uploader.h
index 4a30185..2168069 100644
--- a/components/metrics/test/test_metrics_log_uploader.h
+++ b/components/metrics/test/test_metrics_log_uploader.h
@@ -6,6 +6,7 @@
 #define COMPONENTS_METRICS_TEST_TEST_METRICS_LOG_UPLOADER_H_
 
 #include "base/memory/weak_ptr.h"
+#include "components/metrics/metrics_log.h"
 #include "components/metrics/metrics_log_uploader.h"
 #include "third_party/metrics_proto/reporting_info.pb.h"
 
@@ -34,6 +35,7 @@
  private:
   // MetricsLogUploader:
   void UploadLog(const std::string& compressed_log_data,
+                 const LogMetadata& log_metadata,
                  const std::string& log_hash,
                  const std::string& log_signature,
                  const ReportingInfo& reporting_info) override;
diff --git a/components/metrics/unsent_log_store.cc b/components/metrics/unsent_log_store.cc
index a024a8e..9d4eddd 100644
--- a/components/metrics/unsent_log_store.cc
+++ b/components/metrics/unsent_log_store.cc
@@ -33,6 +33,7 @@
 const char kLogSentCountKey[] = "sent_samples_count";
 const char kLogPersistedSizeInKbKey[] = "unsent_persisted_size_in_kb";
 const char kLogUserIdKey[] = "user_id";
+const char kLogSourceType[] = "type";
 
 std::string EncodeToBase64(const std::string& to_convert) {
   DCHECK(to_convert.data());
@@ -73,7 +74,11 @@
     dict_value.Set(kLogSignatureKey, EncodeToBase64(log->signature));
     dict_value.Set(kLogDataKey, EncodeToBase64(log->compressed_log_data));
     dict_value.Set(kLogTimestampKey, log->timestamp);
-
+    if (log->log_metadata.log_source_type.has_value()) {
+      dict_value.Set(
+          kLogSourceType,
+          static_cast<int>(log->log_metadata.log_source_type.value()));
+    }
     auto user_id = log->log_metadata.user_id;
     if (user_id.has_value()) {
       dict_value.Set(kLogUserIdKey,
@@ -228,6 +233,11 @@
   return list_[staged_log_index_]->log_metadata.user_id;
 }
 
+const LogMetadata UnsentLogStore::staged_log_metadata() const {
+  DCHECK(has_staged_log());
+  return std::move(list_[staged_log_index_]->log_metadata);
+}
+
 // static
 bool UnsentLogStore::ComputeHMACForLog(const std::string& log_data,
                                        const std::string& signing_key,
@@ -465,6 +475,12 @@
     info->signature = DecodeFromBase64(info->signature);
     // timestamp doesn't need to be decoded.
 
+    absl::optional<int> log_source_type = dict->FindInt(kLogSourceType);
+    if (log_source_type.has_value()) {
+      info->log_metadata.log_source_type =
+          static_cast<UkmLogSourceType>(log_source_type.value());
+    }
+
     // Extract user id of the log if it exists.
     const std::string* user_id_str = dict->FindString(kLogUserIdKey);
     if (user_id_str) {
diff --git a/components/metrics/unsent_log_store.h b/components/metrics/unsent_log_store.h
index 6f223b51..3115111 100644
--- a/components/metrics/unsent_log_store.h
+++ b/components/metrics/unsent_log_store.h
@@ -144,6 +144,7 @@
   const std::string& staged_log_hash() const override;
   const std::string& staged_log_signature() const override;
   absl::optional<uint64_t> staged_log_user_id() const override;
+  const LogMetadata staged_log_metadata() const override;
   void StageNextLog() override;
   void DiscardStagedLog(base::StringPiece reason = "") override;
   void MarkStagedLogAsSent() override;
diff --git a/components/metrics/unsent_log_store_unittest.cc b/components/metrics/unsent_log_store_unittest.cc
index 9634b52..dedc4d7d 100644
--- a/components/metrics/unsent_log_store_unittest.cc
+++ b/components/metrics/unsent_log_store_unittest.cc
@@ -558,7 +558,7 @@
   const uint64_t user_id = 12345L;
 
   TestUnsentLogStore unsent_log_store(&prefs_, kLogByteLimit);
-  LogMetadata log_metadata(absl::nullopt, user_id);
+  LogMetadata log_metadata(absl::nullopt, user_id, absl::nullopt);
   unsent_log_store.StoreLog(foo_text, log_metadata,
                             MetricsLogsEventManager::CreateReason::kUnknown);
   unsent_log_store.StageNextLog();
@@ -583,7 +583,7 @@
   const uint64_t large_user_id = std::numeric_limits<uint64_t>::max();
 
   TestUnsentLogStore unsent_log_store(&prefs_, kLogByteLimit);
-  LogMetadata log_metadata(absl::nullopt, large_user_id);
+  LogMetadata log_metadata(absl::nullopt, large_user_id, absl::nullopt);
   unsent_log_store.StoreLog(foo_text, log_metadata,
                             MetricsLogsEventManager::CreateReason::kUnknown);
   unsent_log_store.StageNextLog();
@@ -603,6 +603,34 @@
   EXPECT_EQ(large_user_id, read_unsent_log_store.staged_log_user_id().value());
 }
 
+TEST_F(UnsentLogStoreTest, StoreLogWithOnlyAppKMLogSource) {
+  const char foo_text[] = "foo";
+  const UkmLogSourceType log_source_type = UkmLogSourceType::APPKM_ONLY;
+
+  TestUnsentLogStore unsent_log_store(&prefs_, kLogByteLimit);
+  LogMetadata log_metadata(absl::nullopt, absl::nullopt, log_source_type);
+  unsent_log_store.StoreLog(foo_text, log_metadata,
+                            MetricsLogsEventManager::CreateReason::kUnknown);
+  unsent_log_store.StageNextLog();
+
+  EXPECT_EQ(Compress(foo_text), unsent_log_store.staged_log());
+  EXPECT_EQ(unsent_log_store.staged_log_metadata().log_source_type.value(),
+            log_source_type);
+
+  unsent_log_store.TrimAndPersistUnsentLogs(/*overwrite_in_memory_store=*/true);
+
+  // Reads persisted logs from new log store.
+  TestUnsentLogStore read_unsent_log_store(&prefs_, kLogByteLimit);
+  read_unsent_log_store.LoadPersistedUnsentLogs();
+  EXPECT_EQ(1U, read_unsent_log_store.size());
+
+  // Ensure that the log source type was updated correctly in log metadata.
+  read_unsent_log_store.StageNextLog();
+  EXPECT_EQ(
+      log_source_type,
+      read_unsent_log_store.staged_log_metadata().log_source_type.value());
+}
+
 TEST_F(UnsentLogStoreTest, UnsentLogMetadataMetrics) {
   std::unique_ptr<TestUnsentLogStoreMetrics> metrics =
       std::make_unique<TestUnsentLogStoreMetrics>();
@@ -628,7 +656,7 @@
   const char kNoSampleLog[] = "no sample log";
 
   LogMetadata log_metadata_with_oversize_sample(kOversizeLogSampleCount,
-                                                absl::nullopt);
+                                                absl::nullopt, absl::nullopt);
   unsent_log_store.StoreLog(oversize_log, log_metadata_with_oversize_sample,
                             MetricsLogsEventManager::CreateReason::kUnknown);
 
@@ -636,12 +664,14 @@
   unsent_log_store.StoreLog(kNoSampleLog, log_metadata_with_no_sample,
                             MetricsLogsEventManager::CreateReason::kUnknown);
 
-  LogMetadata log_metadata_foo_sample(kFooSampleCount, absl::nullopt);
+  LogMetadata log_metadata_foo_sample(kFooSampleCount, absl::nullopt,
+                                      absl::nullopt);
   unsent_log_store.StoreLog(kFooText, log_metadata_foo_sample,
                             MetricsLogsEventManager::CreateReason::kUnknown);
 
   // The foobar_log will be staged first.
-  LogMetadata log_metadata_foo_bar_sample(kFooBarSampleCount, absl::nullopt);
+  LogMetadata log_metadata_foo_bar_sample(kFooBarSampleCount, absl::nullopt,
+                                          absl::nullopt);
   unsent_log_store.StoreLog(foobar_log, log_metadata_foo_bar_sample,
                             MetricsLogsEventManager::CreateReason::kUnknown);