Never use strlen on utf8 runs so null characters work.

Fixes https://github.com/google/protobuf/issues/1933

Add a new test that forces strings into two different implementations from the
NSString class cluster to help confirm we're exercising both paths by which
CodedOutputStream will extract data from an NSString.

Move the old +load test (that was flawed because the behavior really depends on
the type of string from the NSString class cluster); into a unittest that
targets the specific case we're adding a behavior confirmation on.

As a bonus, improve the TextFormat generation of string characters < 0x20.
This commit is contained in:
Thomas Van Lenten 2016-08-08 18:02:43 -04:00
parent 237f321e33
commit 1a6c1d092d
3 changed files with 100 additions and 33 deletions

View File

@ -144,22 +144,6 @@ static void GPBWriteRawLittleEndian64(GPBOutputBufferState *state,
GPBWriteRawByte(state, (int32_t)(value >> 56) & 0xFF);
}
#if defined(DEBUG) && DEBUG && !defined(NS_BLOCK_ASSERTIONS)
+ (void)load {
// This test exists to verify that CFStrings with embedded NULLs will work
// for us. If this Assert fails, all code below that depends on
// CFStringGetCStringPtr will NOT work properly on strings that contain
// embedded NULLs, and we do get that in some protobufs.
// Note that this will not be compiled in release.
// We didn't feel that just keeping it in a unit test was sufficient because
// the Protobuf unit tests are only run when somebody is actually working
// on protobufs.
CFStringRef zeroTest = CFSTR("Test\0String");
const char *cString = CFStringGetCStringPtr(zeroTest, kCFStringEncodingUTF8);
NSAssert(cString == NULL, @"Serious Error");
}
#endif // DEBUG && !defined(NS_BLOCK_ASSERTIONS)
- (void)dealloc {
[self flush];
[state_.output close];
@ -282,19 +266,15 @@ static void GPBWriteRawLittleEndian64(GPBOutputBufferState *state,
}
- (void)writeStringNoTag:(const NSString *)value {
// If you are concerned about embedded NULLs see the test in
// +load above.
const char *quickString =
CFStringGetCStringPtr((CFStringRef)value, kCFStringEncodingUTF8);
size_t length = (quickString != NULL)
? strlen(quickString)
: [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
size_t length = [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
GPBWriteRawVarint32(&state_, (int32_t)length);
if (length == 0) {
return;
}
const char *quickString =
CFStringGetCStringPtr((CFStringRef)value, kCFStringEncodingUTF8);
// Fast path: Most strings are short, if the buffer already has space,
// add to it directly.
NSUInteger bufferBytesLeft = state_.size - state_.position;
@ -1038,14 +1018,7 @@ size_t GPBComputeBoolSizeNoTag(BOOL value) {
}
size_t GPBComputeStringSizeNoTag(NSString *value) {
// If you are concerned about embedded NULLs see the test in
// +load above.
const char *quickString =
CFStringGetCStringPtr((CFStringRef)value, kCFStringEncodingUTF8);
NSUInteger length =
(quickString != NULL)
? strlen(quickString)
: [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
NSUInteger length = [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
return GPBComputeRawVarint32SizeForInteger(length) + length;
}

View File

@ -1052,7 +1052,15 @@ static void AppendStringEscaped(NSString *toPrint, NSMutableString *destStr) {
case '\'': [destStr appendString:@"\\\'"]; break;
case '\\': [destStr appendString:@"\\\\"]; break;
default:
[destStr appendFormat:@"%C", aChar];
// This differs slightly from the C++ code in that the C++ doesn't
// generate UTF8; it looks at the string in UTF8, but escapes every
// byte > 0x7E.
if (aChar < 0x20) {
[destStr appendFormat:@"\\%d%d%d",
(aChar / 64), ((aChar % 64) / 8), (aChar % 8)];
} else {
[destStr appendFormat:@"%C", aChar];
}
break;
}
}

View File

@ -193,6 +193,32 @@
}
}
- (void)assertWriteStringNoTag:(NSData*)data
value:(NSString *)value
context:(NSString *)contextMessage {
NSOutputStream* rawOutput = [NSOutputStream outputStreamToMemory];
GPBCodedOutputStream* output =
[GPBCodedOutputStream streamWithOutputStream:rawOutput];
[output writeStringNoTag:value];
[output flush];
NSData* actual =
[rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey];
XCTAssertEqualObjects(data, actual, @"%@", contextMessage);
// Try different block sizes.
for (int blockSize = 1; blockSize <= 16; blockSize *= 2) {
rawOutput = [NSOutputStream outputStreamToMemory];
output = [GPBCodedOutputStream streamWithOutputStream:rawOutput
bufferSize:blockSize];
[output writeStringNoTag:value];
[output flush];
actual = [rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey];
XCTAssertEqualObjects(data, actual, @"%@", contextMessage);
}
}
- (void)testWriteVarint1 {
[self assertWriteVarint:bytes(0x00) value:0];
}
@ -337,4 +363,64 @@
XCTAssertEqualObjects(rawBytes, goldenData);
}
- (void)testCFStringGetCStringPtrAndStringsWithNullChars {
// This test exists to verify that CFStrings with embedded NULLs still expose
// their raw buffer if they are backed by UTF8 storage. If this fails, the
// quick/direct access paths in GPBCodedOutputStream that depend on
// CFStringGetCStringPtr need to be re-evalutated (maybe just removed).
// And yes, we do get NULLs in strings from some servers.
char zeroTest[] = "\0Test\0String";
// Note: there is a \0 at the end of this since it is a c-string.
NSString *asNSString = [[NSString alloc] initWithBytes:zeroTest
length:sizeof(zeroTest)
encoding:NSUTF8StringEncoding];
const char *cString =
CFStringGetCStringPtr((CFStringRef)asNSString, kCFStringEncodingUTF8);
XCTAssertTrue(cString != NULL);
// Again, if the above assert fails, then it means NSString no longer exposes
// the raw utf8 storage of a string created from utf8 input, so the code using
// CFStringGetCStringPtr in GPBCodedOutputStream will still work (it will take
// a different code path); but the optimizations for when
// CFStringGetCStringPtr does work could possibly go away.
XCTAssertEqual(sizeof(zeroTest),
[asNSString lengthOfBytesUsingEncoding:NSUTF8StringEncoding]);
XCTAssertTrue(0 == memcmp(cString, zeroTest, sizeof(zeroTest)));
[asNSString release];
}
- (void)testWriteStringsWithZeroChar {
// Unicode allows `\0` as a character, and NSString is a class cluster, so
// there are a few different classes that could end up beind a given string.
// Historically, we've seen differences based on constant strings in code and
// strings built via the NSString apis. So this round trips them to ensure
// they are acting as expected.
NSArray<NSString *> *strs = @[
@"\0at start",
@"in\0middle",
@"at end\0",
];
int i = 0;
for (NSString *str in strs) {
NSData *asUTF8 = [str dataUsingEncoding:NSUTF8StringEncoding];
NSMutableData *expected = [NSMutableData data];
uint8_t lengthByte = (uint8_t)asUTF8.length;
[expected appendBytes:&lengthByte length:1];
[expected appendData:asUTF8];
NSString *context = [NSString stringWithFormat:@"Loop %d - Literal", i];
[self assertWriteStringNoTag:expected value:str context:context];
// Force a new string to be built which gets a different class from the
// NSString class cluster than the literal did.
NSString *str2 = [NSString stringWithFormat:@"%@", str];
context = [NSString stringWithFormat:@"Loop %d - Built", i];
[self assertWriteStringNoTag:expected value:str2 context:context];
++i;
}
}
@end