device/fido: handle user-info truncation.

CTAP2 authenticators are permitted to truncate |name| and |displayName|
fields at 64 bytes, and CTAP2 Yubikeys actually do so. However, this
truncation is done without any reference to the UTF-8 structure thus can
truncate in the middle of a multi-byte code-point. This makes future
responses from the authenticator potentially invalid CBOR because the
strings within are invalid UTF-8.

This change adds a facility to components/cbor to allow users of the
parser to indicate that invalid UTF-8 (except in map keys) should be
returned as Values of type |INVALID_UTF8|.

This change also uses this facility when processing CTAP2 GetAssertion
responses to allow precisely the |name| and |displayName| fields to be
truncated at (or after) 64 bytes. Any partial, multi-byte sequence at
the end of the invalid UTF-8 is dropped.

Bug: 941120
Change-Id: I1742128ee1e689fe400083404f17efc8afd55a97
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1573091
Commit-Queue: Adam Langley <[email protected]>
Reviewed-by: Chris Palmer <[email protected]>
Reviewed-by: Balazs Engedy <[email protected]>
Auto-Submit: Adam Langley <[email protected]>
Cr-Commit-Position: refs/heads/master@{#652977}
diff --git a/components/cbor/diagnostic_writer.cc b/components/cbor/diagnostic_writer.cc
index 870f6ba..04983e1 100644
--- a/components/cbor/diagnostic_writer.cc
+++ b/components/cbor/diagnostic_writer.cc
@@ -18,6 +18,23 @@
 
 namespace cbor {
 
+static bool AppendHex(const std::vector<uint8_t> bytes,
+                      char type_char,
+                      size_t rough_max_output_bytes,
+                      std::string* s) {
+  s->push_back(type_char);
+  s->push_back('\'');
+
+  if (ClampAdd(s->size(), ClampMul(2u, bytes.size())) >
+      rough_max_output_bytes) {
+    return false;
+  }
+  s->append(base::HexEncode(bytes.data(), bytes.size()));
+
+  s->push_back('\'');
+  return true;
+}
+
 static bool Serialize(const Value& node,
                       size_t rough_max_output_bytes,
                       std::string* s) {
@@ -30,19 +47,17 @@
       s->append(base::NumberToString(node.GetNegative()));
       break;
 
-    case Value::Type::BYTE_STRING: {
-      s->append("h'");
-
-      const std::vector<uint8_t>& bytes = node.GetBytestring();
-      if (ClampAdd(s->size(), ClampMul(2u, bytes.size())) >
-          rough_max_output_bytes) {
+    case Value::Type::INVALID_UTF8:
+      if (!AppendHex(node.GetInvalidUTF8(), 's', rough_max_output_bytes, s)) {
         return false;
       }
-      s->append(base::HexEncode(bytes.data(), bytes.size()));
-
-      s->push_back('\'');
       break;
-    }
+
+    case Value::Type::BYTE_STRING:
+      if (!AppendHex(node.GetBytestring(), 'h', rough_max_output_bytes, s)) {
+        return false;
+      }
+      break;
 
     case Value::Type::STRING: {
       std::string quoted_and_escaped;
diff --git a/components/cbor/diagnostic_writer_unittest.cc b/components/cbor/diagnostic_writer_unittest.cc
index c804ce0..a45fbd0 100644
--- a/components/cbor/diagnostic_writer_unittest.cc
+++ b/components/cbor/diagnostic_writer_unittest.cc
@@ -3,6 +3,8 @@
 // found in the LICENSE file.
 
 #include "components/cbor/diagnostic_writer.h"
+
+#include "components/cbor/reader.h"
 #include "components/cbor/values.h"
 
 #include "testing/gtest/include/gtest/gtest.h"
@@ -58,4 +60,15 @@
       3u);
 }
 
+TEST(CBORDiagnosticWriterTest, InvalidUTF8) {
+  static const uint8_t kInvalidUTF8[] = {0x62, 0xe2, 0x80};
+  cbor::Reader::Config config;
+  config.allow_invalid_utf8 = true;
+  base::Optional<cbor::Value> maybe_value =
+      cbor::Reader::Read(kInvalidUTF8, config);
+
+  ASSERT_TRUE(maybe_value);
+  EXPECT_EQ("s'E280'", DiagnosticWriter::Write(*maybe_value));
+}
+
 }  // namespace cbor
diff --git a/components/cbor/reader.cc b/components/cbor/reader.cc
index dbab268..db6c03ef 100644
--- a/components/cbor/reader.cc
+++ b/components/cbor/reader.cc
@@ -61,6 +61,9 @@
 
 }  // namespace
 
+Reader::Config::Config() = default;
+Reader::Config::~Config() = default;
+
 Reader::Reader(base::span<const uint8_t> data)
     : rest_(data), error_code_(DecoderError::CBOR_NO_ERROR) {}
 Reader::~Reader() {}
@@ -69,18 +72,11 @@
 base::Optional<Value> Reader::Read(base::span<uint8_t const> data,
                                    DecoderError* error_code_out,
                                    int max_nesting_level) {
-  size_t num_bytes_consumed;
-  auto value =
-      Read(data, &num_bytes_consumed, error_code_out, max_nesting_level);
+  Config config;
+  config.error_code_out = error_code_out;
+  config.max_nesting_level = max_nesting_level;
 
-  if (value && num_bytes_consumed != data.size()) {
-    if (error_code_out) {
-      *error_code_out = DecoderError::EXTRANEOUS_DATA;
-    }
-    return base::nullopt;
-  }
-
-  return value;
+  return Read(data, config);
 }
 
 // static
@@ -88,24 +84,44 @@
                                    size_t* num_bytes_consumed,
                                    DecoderError* error_code_out,
                                    int max_nesting_level) {
+  DCHECK(num_bytes_consumed);
+
+  Config config;
+  config.num_bytes_consumed = num_bytes_consumed;
+  config.error_code_out = error_code_out;
+  config.max_nesting_level = max_nesting_level;
+
+  return Read(data, config);
+}
+
+// static
+base::Optional<Value> Reader::Read(base::span<uint8_t const> data,
+                                   const Config& config) {
   Reader reader(data);
   base::Optional<Value> value =
-      reader.DecodeCompleteDataItem(max_nesting_level);
+      reader.DecodeCompleteDataItem(config, config.max_nesting_level);
 
   auto error = reader.GetErrorCode();
   const bool success = value.has_value();
   DCHECK_EQ(success, error == DecoderError::CBOR_NO_ERROR);
 
-  if (error_code_out) {
-    *error_code_out = error;
+  if (config.num_bytes_consumed) {
+    *config.num_bytes_consumed =
+        success ? data.size() - reader.num_bytes_remaining() : 0;
+  } else if (success && reader.num_bytes_remaining() > 0) {
+    error = DecoderError::EXTRANEOUS_DATA;
+    value.reset();
   }
 
-  *num_bytes_consumed =
-      success ? data.size() - reader.num_bytes_remaining() : 0;
+  if (config.error_code_out) {
+    *config.error_code_out = error;
+  }
+
   return value;
 }
 
-base::Optional<Value> Reader::DecodeCompleteDataItem(int max_nesting_level) {
+base::Optional<Value> Reader::DecodeCompleteDataItem(const Config& config,
+                                                     int max_nesting_level) {
   if (max_nesting_level < 0 || max_nesting_level > kCBORMaxDepth) {
     error_code_ = DecoderError::TOO_MUCH_NESTING;
     return base::nullopt;
@@ -124,15 +140,16 @@
     case Value::Type::BYTE_STRING:
       return ReadByteStringContent(*header);
     case Value::Type::STRING:
-      return ReadStringContent(*header);
+      return ReadStringContent(*header, config);
     case Value::Type::ARRAY:
-      return ReadArrayContent(*header, max_nesting_level);
+      return ReadArrayContent(*header, config, max_nesting_level);
     case Value::Type::MAP:
-      return ReadMapContent(*header, max_nesting_level);
+      return ReadMapContent(*header, config, max_nesting_level);
     case Value::Type::SIMPLE_VALUE:
       return DecodeToSimpleValue(*header);
     case Value::Type::TAG:  // We explicitly don't support TAG.
     case Value::Type::NONE:
+    case Value::Type::INVALID_UTF8:
       break;
   }
 
@@ -238,7 +255,8 @@
 }
 
 base::Optional<Value> Reader::ReadStringContent(
-    const Reader::DataItemHeader& header) {
+    const Reader::DataItemHeader& header,
+    const Config& config) {
   uint64_t num_bytes = header.value;
   const base::Optional<base::span<const uint8_t>> bytes = ReadBytes(num_bytes);
   if (!bytes) {
@@ -246,10 +264,16 @@
   }
 
   std::string cbor_string(bytes->begin(), bytes->end());
+  if (base::IsStringUTF8(cbor_string)) {
+    return Value(std::move(cbor_string));
+  }
 
-  return HasValidUTF8Format(cbor_string)
-             ? base::make_optional<Value>(Value(std::move(cbor_string)))
-             : base::nullopt;
+  if (config.allow_invalid_utf8) {
+    return Value(*bytes, Value::Type::INVALID_UTF8);
+  }
+
+  error_code_ = DecoderError::INVALID_UTF8;
+  return base::nullopt;
 }
 
 base::Optional<Value> Reader::ReadByteStringContent(
@@ -266,13 +290,14 @@
 
 base::Optional<Value> Reader::ReadArrayContent(
     const Reader::DataItemHeader& header,
+    const Config& config,
     int max_nesting_level) {
   const uint64_t length = header.value;
 
   Value::ArrayValue cbor_array;
   for (uint64_t i = 0; i < length; ++i) {
     base::Optional<Value> cbor_element =
-        DecodeCompleteDataItem(max_nesting_level - 1);
+        DecodeCompleteDataItem(config, max_nesting_level - 1);
     if (!cbor_element.has_value()) {
       return base::nullopt;
     }
@@ -283,13 +308,16 @@
 
 base::Optional<Value> Reader::ReadMapContent(
     const Reader::DataItemHeader& header,
+    const Config& config,
     int max_nesting_level) {
   const uint64_t length = header.value;
 
   Value::MapValue cbor_map;
   for (uint64_t i = 0; i < length; ++i) {
-    base::Optional<Value> key = DecodeCompleteDataItem(max_nesting_level - 1);
-    base::Optional<Value> value = DecodeCompleteDataItem(max_nesting_level - 1);
+    base::Optional<Value> key =
+        DecodeCompleteDataItem(config, max_nesting_level - 1);
+    base::Optional<Value> value =
+        DecodeCompleteDataItem(config, max_nesting_level - 1);
     if (!key.has_value() || !value.has_value()) {
       return base::nullopt;
     }
@@ -300,6 +328,9 @@
       case Value::Type::STRING:
       case Value::Type::BYTE_STRING:
         break;
+      case Value::Type::INVALID_UTF8:
+        error_code_ = DecoderError::INVALID_UTF8;
+        return base::nullopt;
       default:
         error_code_ = DecoderError::INCORRECT_MAP_KEY_TYPE;
         return base::nullopt;
@@ -338,14 +369,6 @@
   return true;
 }
 
-bool Reader::HasValidUTF8Format(const std::string& string_data) {
-  if (!base::IsStringUTF8(string_data)) {
-    error_code_ = DecoderError::INVALID_UTF8;
-    return false;
-  }
-  return true;
-}
-
 bool Reader::IsKeyInOrder(const Value& new_key, Value::MapValue* map) {
   if (map->empty()) {
     return true;
diff --git a/components/cbor/reader.h b/components/cbor/reader.h
index 6cf58873b..25647f82 100644
--- a/components/cbor/reader.h
+++ b/components/cbor/reader.h
@@ -79,6 +79,35 @@
   // CBOR nested depth sufficient for most use cases.
   static const int kCBORMaxDepth = 16;
 
+  // Config contains configuration for a CBOR parsing operation.
+  struct CBOR_EXPORT Config {
+    Config();
+    ~Config();
+
+    // num_bytes_consumed, if not nullptr, is used to report the number of bytes
+    // of input consumed. This suppresses the |EXTRANEOUS_DATA| error case.
+    size_t* num_bytes_consumed = nullptr;
+    // error_code_out, if not nullptr, is used to report the specific error in
+    // the case that parsing fails.
+    DecoderError* error_code_out = nullptr;
+    // max_nesting_level controls the maximum depth of CBOR nesting that will be
+    // permitted. This exists to control stack consumption during parsing.
+    int max_nesting_level = kCBORMaxDepth;
+    // allow_invalid_utf8 causes strings that are not valid UTF-8 to be accepted
+    // and suppresses the |INVALID_UTF8| error, unless such strings are map
+    // keys. Invalid strings will result in Values of type |INVALID_UTF8| rather
+    // than |STRING|. Users of this feature should ensure that every invalid
+    // string is accounted for in the resulting structure.
+    //
+    // (Map keys are not allowed to be invalid because it was not necessary for
+    // the motivating case and because it adds complexity to handle the
+    // ordering correctly.)
+    bool allow_invalid_utf8 = false;
+
+   private:
+    DISALLOW_COPY_AND_ASSIGN(Config);
+  };
+
   ~Reader();
 
   // Reads and parses |input_data| into a Value. Returns an empty Optional
@@ -97,8 +126,13 @@
                                     DecoderError* error_code_out = nullptr,
                                     int max_nesting_level = kCBORMaxDepth);
 
-  // Never fails with EXTRANEOUS_DATA, but informs the caller of how many bytes
-  // were consumed through |num_bytes_consumed|.
+  // A version of |Read|, above, that takes a |Config| structure to allow
+  // additional controls.
+  static base::Optional<Value> Read(base::span<const uint8_t> input_data,
+                                    const Config& config);
+
+  // A version of |Read| that takes some fields of |Config| as parameters to
+  // avoid having to construct a |Config| object explicitly.
   static base::Optional<Value> Read(base::span<const uint8_t> input_data,
                                     size_t* num_bytes_consumed,
                                     DecoderError* error_code_out = nullptr,
@@ -126,23 +160,23 @@
   };
 
   base::Optional<DataItemHeader> DecodeDataItemHeader();
-  base::Optional<Value> DecodeCompleteDataItem(int max_nesting_level);
+  base::Optional<Value> DecodeCompleteDataItem(const Config& config,
+                                               int max_nesting_level);
   base::Optional<Value> DecodeValueToNegative(uint64_t value);
   base::Optional<Value> DecodeValueToUnsigned(uint64_t value);
   base::Optional<Value> DecodeToSimpleValue(const DataItemHeader& header);
   base::Optional<uint64_t> ReadVariadicLengthInteger(uint8_t additional_info);
   base::Optional<Value> ReadByteStringContent(const DataItemHeader& header);
-  base::Optional<Value> ReadStringContent(const DataItemHeader& header);
+  base::Optional<Value> ReadStringContent(const DataItemHeader& header,
+                                          const Config& config);
   base::Optional<Value> ReadArrayContent(const DataItemHeader& header,
+                                         const Config& config,
                                          int max_nesting_level);
   base::Optional<Value> ReadMapContent(const DataItemHeader& header,
+                                       const Config& config,
                                        int max_nesting_level);
   base::Optional<uint8_t> ReadByte();
   base::Optional<base::span<const uint8_t>> ReadBytes(uint64_t num_bytes);
-  // TODO(crbug/879237): This function's only caller has to make a copy of a
-  // `span<uint8_t>` to satisfy this function's interface. Maybe we can make
-  // this function take a `const span<const uint8_t>` and avoid copying?
-  bool HasValidUTF8Format(const std::string& string_data);
   bool IsKeyInOrder(const Value& new_key, Value::MapValue* map);
   bool IsEncodingMinimal(uint8_t additional_bytes, uint64_t uint_data);
 
diff --git a/components/cbor/reader_unittest.cc b/components/cbor/reader_unittest.cc
index c570567..fd61c9a 100644
--- a/components/cbor/reader_unittest.cc
+++ b/components/cbor/reader_unittest.cc
@@ -1234,4 +1234,58 @@
   }
 }
 
+TEST(CBORReaderTest, AllowInvalidUTF8) {
+  static const uint8_t kInvalidUTF8[] = {
+      // clang-format off
+      0xa1,                    // map of length 1
+        0x61, 'x',             // "x"
+        0x81,                  // array of length 1
+          0xa2,                // map of length 2
+            0x61, 'y',         // "y"
+            0x62, 0xe2, 0x80,  // invalid UTF-8 value
+            0x61, 'z',         // "z"
+            0x61, '.',         // "."
+      // clang-format on
+  };
+
+  Reader::DecoderError error;
+  Reader::Config config;
+  config.error_code_out = &error;
+
+  base::Optional<Value> cbor = Reader::Read(kInvalidUTF8, config);
+  EXPECT_FALSE(cbor);
+  EXPECT_EQ(Reader::DecoderError::INVALID_UTF8, error);
+
+  cbor = Reader::Read(kInvalidUTF8, config);
+  EXPECT_FALSE(cbor);
+  EXPECT_EQ(Reader::DecoderError::INVALID_UTF8, error);
+
+  config.allow_invalid_utf8 = true;
+
+  cbor = Reader::Read(kInvalidUTF8, config);
+  EXPECT_TRUE(cbor);
+  EXPECT_EQ(Reader::DecoderError::CBOR_NO_ERROR, error);
+  const cbor::Value& invalid_value = cbor->GetMap()
+                                         .find(Value("x"))
+                                         ->second.GetArray()[0]
+                                         .GetMap()
+                                         .find(Value("y"))
+                                         ->second;
+  ASSERT_TRUE(invalid_value.is_invalid_utf8());
+  EXPECT_EQ(std::vector<uint8_t>({0xe2, 0x80}), invalid_value.GetInvalidUTF8());
+
+  static const uint8_t kInvalidUTF8InMapKey[] = {
+      // clang-format off
+      0xa1,                    // map of length 1
+        0x62, 0xe2, 0x80,      // invalid UTF-8 map key
+        0x61, '.',             // "."
+      // clang-format on
+  };
+
+  EXPECT_TRUE(config.allow_invalid_utf8);
+  cbor = Reader::Read(kInvalidUTF8InMapKey, config);
+  EXPECT_FALSE(cbor);
+  EXPECT_EQ(Reader::DecoderError::INVALID_UTF8, error);
+}
+
 }  // namespace cbor
diff --git a/components/cbor/values.cc b/components/cbor/values.cc
index f495a7f..9a2cd99 100644
--- a/components/cbor/values.cc
+++ b/components/cbor/values.cc
@@ -26,6 +26,7 @@
     case Type::NEGATIVE:
       integer_value_ = 0;
       return;
+    case Type::INVALID_UTF8:
     case Type::BYTE_STRING:
       new (&bytestring_value_) BinaryValue();
       return;
@@ -71,6 +72,11 @@
     : type_(Type::BYTE_STRING),
       bytestring_value_(in_bytes.begin(), in_bytes.end()) {}
 
+Value::Value(base::span<const uint8_t> in_bytes, Type type)
+    : type_(type), bytestring_value_(in_bytes.begin(), in_bytes.end()) {
+  DCHECK(type_ == Type::BYTE_STRING || type_ == Type::INVALID_UTF8);
+}
+
 Value::Value(BinaryValue&& in_bytes) noexcept
     : type_(Type::BYTE_STRING), bytestring_value_(std::move(in_bytes)) {}
 
@@ -143,6 +149,8 @@
   switch (type_) {
     case Type::NONE:
       return Value();
+    case Type::INVALID_UTF8:
+      return Value(bytestring_value_, Type::INVALID_UTF8);
     case Type::UNSIGNED:
     case Type::NEGATIVE:
       return Value(integer_value_);
@@ -220,6 +228,11 @@
   return map_value_;
 }
 
+const Value::BinaryValue& Value::GetInvalidUTF8() const {
+  CHECK(is_invalid_utf8());
+  return bytestring_value_;
+}
+
 void Value::InternalMoveConstructFrom(Value&& that) {
   type_ = that.type_;
 
@@ -228,6 +241,7 @@
     case Type::NEGATIVE:
       integer_value_ = that.integer_value_;
       return;
+    case Type::INVALID_UTF8:
     case Type::BYTE_STRING:
       new (&bytestring_value_) BinaryValue(std::move(that.bytestring_value_));
       return;
@@ -255,6 +269,7 @@
 void Value::InternalCleanup() {
   switch (type_) {
     case Type::BYTE_STRING:
+    case Type::INVALID_UTF8:
       bytestring_value_.~BinaryValue();
       break;
     case Type::STRING:
diff --git a/components/cbor/values.h b/components/cbor/values.h
index ed9aae2..c6c5ee61 100644
--- a/components/cbor/values.h
+++ b/components/cbor/values.h
@@ -94,6 +94,7 @@
     TAG = 6,
     SIMPLE_VALUE = 7,
     NONE = -1,
+    INVALID_UTF8 = -2,
   };
 
   enum class SimpleValue {
@@ -142,6 +143,7 @@
   // Returns true if the current object represents a given type.
   bool is_type(Type type) const { return type == type_; }
   bool is_none() const { return type() == Type::NONE; }
+  bool is_invalid_utf8() const { return type() == Type::INVALID_UTF8; }
   bool is_simple() const { return type() == Type::SIMPLE_VALUE; }
   bool is_bool() const {
     return is_simple() && (simple_value_ == SimpleValue::TRUE_VALUE ||
@@ -167,8 +169,14 @@
   const std::string& GetString() const;
   const ArrayValue& GetArray() const;
   const MapValue& GetMap() const;
+  const BinaryValue& GetInvalidUTF8() const;
 
  private:
+  friend class Reader;
+  // This constructor allows INVALID_UTF8 values to be created, which only
+  // |Reader| may do.
+  Value(base::span<const uint8_t> in_bytes, Type type);
+
   Type type_;
 
   union {
diff --git a/components/cbor/writer.cc b/components/cbor/writer.cc
index a522eff..eebc847 100644
--- a/components/cbor/writer.cc
+++ b/components/cbor/writer.cc
@@ -36,6 +36,10 @@
       return true;
     }
 
+    case Value::Type::INVALID_UTF8:
+      NOTREACHED() << constants::kUnsupportedMajorType;
+      return false;
+
     // Represents unsigned integers.
     case Value::Type::UNSIGNED: {
       int64_t value = node.GetUnsigned();