device/fido: handle user-info truncation.
CTAP2 authenticators are permitted to truncate |name| and |displayName|
fields at 64 bytes, and CTAP2 Yubikeys actually do so. However, this
truncation is done without any reference to the UTF-8 structure thus can
truncate in the middle of a multi-byte code-point. This makes future
responses from the authenticator potentially invalid CBOR because the
strings within are invalid UTF-8.
This change adds a facility to components/cbor to allow users of the
parser to indicate that invalid UTF-8 (except in map keys) should be
returned as Values of type |INVALID_UTF8|.
This change also uses this facility when processing CTAP2 GetAssertion
responses to allow precisely the |name| and |displayName| fields to be
truncated at (or after) 64 bytes. Any partial, multi-byte sequence at
the end of the invalid UTF-8 is dropped.
Bug: 941120
Change-Id: I1742128ee1e689fe400083404f17efc8afd55a97
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1573091
Commit-Queue: Adam Langley <[email protected]>
Reviewed-by: Chris Palmer <[email protected]>
Reviewed-by: Balazs Engedy <[email protected]>
Auto-Submit: Adam Langley <[email protected]>
Cr-Commit-Position: refs/heads/master@{#652977}
diff --git a/components/cbor/diagnostic_writer.cc b/components/cbor/diagnostic_writer.cc
index 870f6ba..04983e1 100644
--- a/components/cbor/diagnostic_writer.cc
+++ b/components/cbor/diagnostic_writer.cc
@@ -18,6 +18,23 @@
namespace cbor {
+static bool AppendHex(const std::vector<uint8_t> bytes,
+ char type_char,
+ size_t rough_max_output_bytes,
+ std::string* s) {
+ s->push_back(type_char);
+ s->push_back('\'');
+
+ if (ClampAdd(s->size(), ClampMul(2u, bytes.size())) >
+ rough_max_output_bytes) {
+ return false;
+ }
+ s->append(base::HexEncode(bytes.data(), bytes.size()));
+
+ s->push_back('\'');
+ return true;
+}
+
static bool Serialize(const Value& node,
size_t rough_max_output_bytes,
std::string* s) {
@@ -30,19 +47,17 @@
s->append(base::NumberToString(node.GetNegative()));
break;
- case Value::Type::BYTE_STRING: {
- s->append("h'");
-
- const std::vector<uint8_t>& bytes = node.GetBytestring();
- if (ClampAdd(s->size(), ClampMul(2u, bytes.size())) >
- rough_max_output_bytes) {
+ case Value::Type::INVALID_UTF8:
+ if (!AppendHex(node.GetInvalidUTF8(), 's', rough_max_output_bytes, s)) {
return false;
}
- s->append(base::HexEncode(bytes.data(), bytes.size()));
-
- s->push_back('\'');
break;
- }
+
+ case Value::Type::BYTE_STRING:
+ if (!AppendHex(node.GetBytestring(), 'h', rough_max_output_bytes, s)) {
+ return false;
+ }
+ break;
case Value::Type::STRING: {
std::string quoted_and_escaped;
diff --git a/components/cbor/diagnostic_writer_unittest.cc b/components/cbor/diagnostic_writer_unittest.cc
index c804ce0..a45fbd0 100644
--- a/components/cbor/diagnostic_writer_unittest.cc
+++ b/components/cbor/diagnostic_writer_unittest.cc
@@ -3,6 +3,8 @@
// found in the LICENSE file.
#include "components/cbor/diagnostic_writer.h"
+
+#include "components/cbor/reader.h"
#include "components/cbor/values.h"
#include "testing/gtest/include/gtest/gtest.h"
@@ -58,4 +60,15 @@
3u);
}
+TEST(CBORDiagnosticWriterTest, InvalidUTF8) {
+ static const uint8_t kInvalidUTF8[] = {0x62, 0xe2, 0x80};
+ cbor::Reader::Config config;
+ config.allow_invalid_utf8 = true;
+ base::Optional<cbor::Value> maybe_value =
+ cbor::Reader::Read(kInvalidUTF8, config);
+
+ ASSERT_TRUE(maybe_value);
+ EXPECT_EQ("s'E280'", DiagnosticWriter::Write(*maybe_value));
+}
+
} // namespace cbor
diff --git a/components/cbor/reader.cc b/components/cbor/reader.cc
index dbab268..db6c03ef 100644
--- a/components/cbor/reader.cc
+++ b/components/cbor/reader.cc
@@ -61,6 +61,9 @@
} // namespace
+Reader::Config::Config() = default;
+Reader::Config::~Config() = default;
+
Reader::Reader(base::span<const uint8_t> data)
: rest_(data), error_code_(DecoderError::CBOR_NO_ERROR) {}
Reader::~Reader() {}
@@ -69,18 +72,11 @@
base::Optional<Value> Reader::Read(base::span<uint8_t const> data,
DecoderError* error_code_out,
int max_nesting_level) {
- size_t num_bytes_consumed;
- auto value =
- Read(data, &num_bytes_consumed, error_code_out, max_nesting_level);
+ Config config;
+ config.error_code_out = error_code_out;
+ config.max_nesting_level = max_nesting_level;
- if (value && num_bytes_consumed != data.size()) {
- if (error_code_out) {
- *error_code_out = DecoderError::EXTRANEOUS_DATA;
- }
- return base::nullopt;
- }
-
- return value;
+ return Read(data, config);
}
// static
@@ -88,24 +84,44 @@
size_t* num_bytes_consumed,
DecoderError* error_code_out,
int max_nesting_level) {
+ DCHECK(num_bytes_consumed);
+
+ Config config;
+ config.num_bytes_consumed = num_bytes_consumed;
+ config.error_code_out = error_code_out;
+ config.max_nesting_level = max_nesting_level;
+
+ return Read(data, config);
+}
+
+// static
+base::Optional<Value> Reader::Read(base::span<uint8_t const> data,
+ const Config& config) {
Reader reader(data);
base::Optional<Value> value =
- reader.DecodeCompleteDataItem(max_nesting_level);
+ reader.DecodeCompleteDataItem(config, config.max_nesting_level);
auto error = reader.GetErrorCode();
const bool success = value.has_value();
DCHECK_EQ(success, error == DecoderError::CBOR_NO_ERROR);
- if (error_code_out) {
- *error_code_out = error;
+ if (config.num_bytes_consumed) {
+ *config.num_bytes_consumed =
+ success ? data.size() - reader.num_bytes_remaining() : 0;
+ } else if (success && reader.num_bytes_remaining() > 0) {
+ error = DecoderError::EXTRANEOUS_DATA;
+ value.reset();
}
- *num_bytes_consumed =
- success ? data.size() - reader.num_bytes_remaining() : 0;
+ if (config.error_code_out) {
+ *config.error_code_out = error;
+ }
+
return value;
}
-base::Optional<Value> Reader::DecodeCompleteDataItem(int max_nesting_level) {
+base::Optional<Value> Reader::DecodeCompleteDataItem(const Config& config,
+ int max_nesting_level) {
if (max_nesting_level < 0 || max_nesting_level > kCBORMaxDepth) {
error_code_ = DecoderError::TOO_MUCH_NESTING;
return base::nullopt;
@@ -124,15 +140,16 @@
case Value::Type::BYTE_STRING:
return ReadByteStringContent(*header);
case Value::Type::STRING:
- return ReadStringContent(*header);
+ return ReadStringContent(*header, config);
case Value::Type::ARRAY:
- return ReadArrayContent(*header, max_nesting_level);
+ return ReadArrayContent(*header, config, max_nesting_level);
case Value::Type::MAP:
- return ReadMapContent(*header, max_nesting_level);
+ return ReadMapContent(*header, config, max_nesting_level);
case Value::Type::SIMPLE_VALUE:
return DecodeToSimpleValue(*header);
case Value::Type::TAG: // We explicitly don't support TAG.
case Value::Type::NONE:
+ case Value::Type::INVALID_UTF8:
break;
}
@@ -238,7 +255,8 @@
}
base::Optional<Value> Reader::ReadStringContent(
- const Reader::DataItemHeader& header) {
+ const Reader::DataItemHeader& header,
+ const Config& config) {
uint64_t num_bytes = header.value;
const base::Optional<base::span<const uint8_t>> bytes = ReadBytes(num_bytes);
if (!bytes) {
@@ -246,10 +264,16 @@
}
std::string cbor_string(bytes->begin(), bytes->end());
+ if (base::IsStringUTF8(cbor_string)) {
+ return Value(std::move(cbor_string));
+ }
- return HasValidUTF8Format(cbor_string)
- ? base::make_optional<Value>(Value(std::move(cbor_string)))
- : base::nullopt;
+ if (config.allow_invalid_utf8) {
+ return Value(*bytes, Value::Type::INVALID_UTF8);
+ }
+
+ error_code_ = DecoderError::INVALID_UTF8;
+ return base::nullopt;
}
base::Optional<Value> Reader::ReadByteStringContent(
@@ -266,13 +290,14 @@
base::Optional<Value> Reader::ReadArrayContent(
const Reader::DataItemHeader& header,
+ const Config& config,
int max_nesting_level) {
const uint64_t length = header.value;
Value::ArrayValue cbor_array;
for (uint64_t i = 0; i < length; ++i) {
base::Optional<Value> cbor_element =
- DecodeCompleteDataItem(max_nesting_level - 1);
+ DecodeCompleteDataItem(config, max_nesting_level - 1);
if (!cbor_element.has_value()) {
return base::nullopt;
}
@@ -283,13 +308,16 @@
base::Optional<Value> Reader::ReadMapContent(
const Reader::DataItemHeader& header,
+ const Config& config,
int max_nesting_level) {
const uint64_t length = header.value;
Value::MapValue cbor_map;
for (uint64_t i = 0; i < length; ++i) {
- base::Optional<Value> key = DecodeCompleteDataItem(max_nesting_level - 1);
- base::Optional<Value> value = DecodeCompleteDataItem(max_nesting_level - 1);
+ base::Optional<Value> key =
+ DecodeCompleteDataItem(config, max_nesting_level - 1);
+ base::Optional<Value> value =
+ DecodeCompleteDataItem(config, max_nesting_level - 1);
if (!key.has_value() || !value.has_value()) {
return base::nullopt;
}
@@ -300,6 +328,9 @@
case Value::Type::STRING:
case Value::Type::BYTE_STRING:
break;
+ case Value::Type::INVALID_UTF8:
+ error_code_ = DecoderError::INVALID_UTF8;
+ return base::nullopt;
default:
error_code_ = DecoderError::INCORRECT_MAP_KEY_TYPE;
return base::nullopt;
@@ -338,14 +369,6 @@
return true;
}
-bool Reader::HasValidUTF8Format(const std::string& string_data) {
- if (!base::IsStringUTF8(string_data)) {
- error_code_ = DecoderError::INVALID_UTF8;
- return false;
- }
- return true;
-}
-
bool Reader::IsKeyInOrder(const Value& new_key, Value::MapValue* map) {
if (map->empty()) {
return true;
diff --git a/components/cbor/reader.h b/components/cbor/reader.h
index 6cf58873b..25647f82 100644
--- a/components/cbor/reader.h
+++ b/components/cbor/reader.h
@@ -79,6 +79,35 @@
// CBOR nested depth sufficient for most use cases.
static const int kCBORMaxDepth = 16;
+ // Config contains configuration for a CBOR parsing operation.
+ struct CBOR_EXPORT Config {
+ Config();
+ ~Config();
+
+ // num_bytes_consumed, if not nullptr, is used to report the number of bytes
+ // of input consumed. This suppresses the |EXTRANEOUS_DATA| error case.
+ size_t* num_bytes_consumed = nullptr;
+ // error_code_out, if not nullptr, is used to report the specific error in
+ // the case that parsing fails.
+ DecoderError* error_code_out = nullptr;
+ // max_nesting_level controls the maximum depth of CBOR nesting that will be
+ // permitted. This exists to control stack consumption during parsing.
+ int max_nesting_level = kCBORMaxDepth;
+ // allow_invalid_utf8 causes strings that are not valid UTF-8 to be accepted
+ // and suppresses the |INVALID_UTF8| error, unless such strings are map
+ // keys. Invalid strings will result in Values of type |INVALID_UTF8| rather
+ // than |STRING|. Users of this feature should ensure that every invalid
+ // string is accounted for in the resulting structure.
+ //
+ // (Map keys are not allowed to be invalid because it was not necessary for
+ // the motivating case and because it adds complexity to handle the
+ // ordering correctly.)
+ bool allow_invalid_utf8 = false;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Config);
+ };
+
~Reader();
// Reads and parses |input_data| into a Value. Returns an empty Optional
@@ -97,8 +126,13 @@
DecoderError* error_code_out = nullptr,
int max_nesting_level = kCBORMaxDepth);
- // Never fails with EXTRANEOUS_DATA, but informs the caller of how many bytes
- // were consumed through |num_bytes_consumed|.
+ // A version of |Read|, above, that takes a |Config| structure to allow
+ // additional controls.
+ static base::Optional<Value> Read(base::span<const uint8_t> input_data,
+ const Config& config);
+
+ // A version of |Read| that takes some fields of |Config| as parameters to
+ // avoid having to construct a |Config| object explicitly.
static base::Optional<Value> Read(base::span<const uint8_t> input_data,
size_t* num_bytes_consumed,
DecoderError* error_code_out = nullptr,
@@ -126,23 +160,23 @@
};
base::Optional<DataItemHeader> DecodeDataItemHeader();
- base::Optional<Value> DecodeCompleteDataItem(int max_nesting_level);
+ base::Optional<Value> DecodeCompleteDataItem(const Config& config,
+ int max_nesting_level);
base::Optional<Value> DecodeValueToNegative(uint64_t value);
base::Optional<Value> DecodeValueToUnsigned(uint64_t value);
base::Optional<Value> DecodeToSimpleValue(const DataItemHeader& header);
base::Optional<uint64_t> ReadVariadicLengthInteger(uint8_t additional_info);
base::Optional<Value> ReadByteStringContent(const DataItemHeader& header);
- base::Optional<Value> ReadStringContent(const DataItemHeader& header);
+ base::Optional<Value> ReadStringContent(const DataItemHeader& header,
+ const Config& config);
base::Optional<Value> ReadArrayContent(const DataItemHeader& header,
+ const Config& config,
int max_nesting_level);
base::Optional<Value> ReadMapContent(const DataItemHeader& header,
+ const Config& config,
int max_nesting_level);
base::Optional<uint8_t> ReadByte();
base::Optional<base::span<const uint8_t>> ReadBytes(uint64_t num_bytes);
- // TODO(crbug/879237): This function's only caller has to make a copy of a
- // `span<uint8_t>` to satisfy this function's interface. Maybe we can make
- // this function take a `const span<const uint8_t>` and avoid copying?
- bool HasValidUTF8Format(const std::string& string_data);
bool IsKeyInOrder(const Value& new_key, Value::MapValue* map);
bool IsEncodingMinimal(uint8_t additional_bytes, uint64_t uint_data);
diff --git a/components/cbor/reader_unittest.cc b/components/cbor/reader_unittest.cc
index c570567..fd61c9a 100644
--- a/components/cbor/reader_unittest.cc
+++ b/components/cbor/reader_unittest.cc
@@ -1234,4 +1234,58 @@
}
}
+TEST(CBORReaderTest, AllowInvalidUTF8) {
+ static const uint8_t kInvalidUTF8[] = {
+ // clang-format off
+ 0xa1, // map of length 1
+ 0x61, 'x', // "x"
+ 0x81, // array of length 1
+ 0xa2, // map of length 2
+ 0x61, 'y', // "y"
+ 0x62, 0xe2, 0x80, // invalid UTF-8 value
+ 0x61, 'z', // "z"
+ 0x61, '.', // "."
+ // clang-format on
+ };
+
+ Reader::DecoderError error;
+ Reader::Config config;
+ config.error_code_out = &error;
+
+ base::Optional<Value> cbor = Reader::Read(kInvalidUTF8, config);
+ EXPECT_FALSE(cbor);
+ EXPECT_EQ(Reader::DecoderError::INVALID_UTF8, error);
+
+ cbor = Reader::Read(kInvalidUTF8, config);
+ EXPECT_FALSE(cbor);
+ EXPECT_EQ(Reader::DecoderError::INVALID_UTF8, error);
+
+ config.allow_invalid_utf8 = true;
+
+ cbor = Reader::Read(kInvalidUTF8, config);
+ EXPECT_TRUE(cbor);
+ EXPECT_EQ(Reader::DecoderError::CBOR_NO_ERROR, error);
+ const cbor::Value& invalid_value = cbor->GetMap()
+ .find(Value("x"))
+ ->second.GetArray()[0]
+ .GetMap()
+ .find(Value("y"))
+ ->second;
+ ASSERT_TRUE(invalid_value.is_invalid_utf8());
+ EXPECT_EQ(std::vector<uint8_t>({0xe2, 0x80}), invalid_value.GetInvalidUTF8());
+
+ static const uint8_t kInvalidUTF8InMapKey[] = {
+ // clang-format off
+ 0xa1, // map of length 1
+ 0x62, 0xe2, 0x80, // invalid UTF-8 map key
+ 0x61, '.', // "."
+ // clang-format on
+ };
+
+ EXPECT_TRUE(config.allow_invalid_utf8);
+ cbor = Reader::Read(kInvalidUTF8InMapKey, config);
+ EXPECT_FALSE(cbor);
+ EXPECT_EQ(Reader::DecoderError::INVALID_UTF8, error);
+}
+
} // namespace cbor
diff --git a/components/cbor/values.cc b/components/cbor/values.cc
index f495a7f..9a2cd99 100644
--- a/components/cbor/values.cc
+++ b/components/cbor/values.cc
@@ -26,6 +26,7 @@
case Type::NEGATIVE:
integer_value_ = 0;
return;
+ case Type::INVALID_UTF8:
case Type::BYTE_STRING:
new (&bytestring_value_) BinaryValue();
return;
@@ -71,6 +72,11 @@
: type_(Type::BYTE_STRING),
bytestring_value_(in_bytes.begin(), in_bytes.end()) {}
+Value::Value(base::span<const uint8_t> in_bytes, Type type)
+ : type_(type), bytestring_value_(in_bytes.begin(), in_bytes.end()) {
+ DCHECK(type_ == Type::BYTE_STRING || type_ == Type::INVALID_UTF8);
+}
+
Value::Value(BinaryValue&& in_bytes) noexcept
: type_(Type::BYTE_STRING), bytestring_value_(std::move(in_bytes)) {}
@@ -143,6 +149,8 @@
switch (type_) {
case Type::NONE:
return Value();
+ case Type::INVALID_UTF8:
+ return Value(bytestring_value_, Type::INVALID_UTF8);
case Type::UNSIGNED:
case Type::NEGATIVE:
return Value(integer_value_);
@@ -220,6 +228,11 @@
return map_value_;
}
+const Value::BinaryValue& Value::GetInvalidUTF8() const {
+ CHECK(is_invalid_utf8());
+ return bytestring_value_;
+}
+
void Value::InternalMoveConstructFrom(Value&& that) {
type_ = that.type_;
@@ -228,6 +241,7 @@
case Type::NEGATIVE:
integer_value_ = that.integer_value_;
return;
+ case Type::INVALID_UTF8:
case Type::BYTE_STRING:
new (&bytestring_value_) BinaryValue(std::move(that.bytestring_value_));
return;
@@ -255,6 +269,7 @@
void Value::InternalCleanup() {
switch (type_) {
case Type::BYTE_STRING:
+ case Type::INVALID_UTF8:
bytestring_value_.~BinaryValue();
break;
case Type::STRING:
diff --git a/components/cbor/values.h b/components/cbor/values.h
index ed9aae2..c6c5ee61 100644
--- a/components/cbor/values.h
+++ b/components/cbor/values.h
@@ -94,6 +94,7 @@
TAG = 6,
SIMPLE_VALUE = 7,
NONE = -1,
+ INVALID_UTF8 = -2,
};
enum class SimpleValue {
@@ -142,6 +143,7 @@
// Returns true if the current object represents a given type.
bool is_type(Type type) const { return type == type_; }
bool is_none() const { return type() == Type::NONE; }
+ bool is_invalid_utf8() const { return type() == Type::INVALID_UTF8; }
bool is_simple() const { return type() == Type::SIMPLE_VALUE; }
bool is_bool() const {
return is_simple() && (simple_value_ == SimpleValue::TRUE_VALUE ||
@@ -167,8 +169,14 @@
const std::string& GetString() const;
const ArrayValue& GetArray() const;
const MapValue& GetMap() const;
+ const BinaryValue& GetInvalidUTF8() const;
private:
+ friend class Reader;
+ // This constructor allows INVALID_UTF8 values to be created, which only
+ // |Reader| may do.
+ Value(base::span<const uint8_t> in_bytes, Type type);
+
Type type_;
union {
diff --git a/components/cbor/writer.cc b/components/cbor/writer.cc
index a522eff..eebc847 100644
--- a/components/cbor/writer.cc
+++ b/components/cbor/writer.cc
@@ -36,6 +36,10 @@
return true;
}
+ case Value::Type::INVALID_UTF8:
+ NOTREACHED() << constants::kUnsupportedMajorType;
+ return false;
+
// Represents unsigned integers.
case Value::Type::UNSIGNED: {
int64_t value = node.GetUnsigned();