Merge pull request #1934 from thomasvl/objc_strings_with_null
Never use strlen on utf8 runs so null characters work.
This commit is contained in:
commit
30bbbe99e2
@ -144,22 +144,6 @@ static void GPBWriteRawLittleEndian64(GPBOutputBufferState *state,
|
|||||||
GPBWriteRawByte(state, (int32_t)(value >> 56) & 0xFF);
|
GPBWriteRawByte(state, (int32_t)(value >> 56) & 0xFF);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(DEBUG) && DEBUG && !defined(NS_BLOCK_ASSERTIONS)
|
|
||||||
+ (void)load {
|
|
||||||
// This test exists to verify that CFStrings with embedded NULLs will work
|
|
||||||
// for us. If this Assert fails, all code below that depends on
|
|
||||||
// CFStringGetCStringPtr will NOT work properly on strings that contain
|
|
||||||
// embedded NULLs, and we do get that in some protobufs.
|
|
||||||
// Note that this will not be compiled in release.
|
|
||||||
// We didn't feel that just keeping it in a unit test was sufficient because
|
|
||||||
// the Protobuf unit tests are only run when somebody is actually working
|
|
||||||
// on protobufs.
|
|
||||||
CFStringRef zeroTest = CFSTR("Test\0String");
|
|
||||||
const char *cString = CFStringGetCStringPtr(zeroTest, kCFStringEncodingUTF8);
|
|
||||||
NSAssert(cString == NULL, @"Serious Error");
|
|
||||||
}
|
|
||||||
#endif // DEBUG && !defined(NS_BLOCK_ASSERTIONS)
|
|
||||||
|
|
||||||
- (void)dealloc {
|
- (void)dealloc {
|
||||||
[self flush];
|
[self flush];
|
||||||
[state_.output close];
|
[state_.output close];
|
||||||
@ -282,19 +266,15 @@ static void GPBWriteRawLittleEndian64(GPBOutputBufferState *state,
|
|||||||
}
|
}
|
||||||
|
|
||||||
- (void)writeStringNoTag:(const NSString *)value {
|
- (void)writeStringNoTag:(const NSString *)value {
|
||||||
// If you are concerned about embedded NULLs see the test in
|
size_t length = [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
|
||||||
// +load above.
|
|
||||||
const char *quickString =
|
|
||||||
CFStringGetCStringPtr((CFStringRef)value, kCFStringEncodingUTF8);
|
|
||||||
size_t length = (quickString != NULL)
|
|
||||||
? strlen(quickString)
|
|
||||||
: [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
|
|
||||||
GPBWriteRawVarint32(&state_, (int32_t)length);
|
GPBWriteRawVarint32(&state_, (int32_t)length);
|
||||||
|
|
||||||
if (length == 0) {
|
if (length == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char *quickString =
|
||||||
|
CFStringGetCStringPtr((CFStringRef)value, kCFStringEncodingUTF8);
|
||||||
|
|
||||||
// Fast path: Most strings are short, if the buffer already has space,
|
// Fast path: Most strings are short, if the buffer already has space,
|
||||||
// add to it directly.
|
// add to it directly.
|
||||||
NSUInteger bufferBytesLeft = state_.size - state_.position;
|
NSUInteger bufferBytesLeft = state_.size - state_.position;
|
||||||
@ -1038,14 +1018,7 @@ size_t GPBComputeBoolSizeNoTag(BOOL value) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
size_t GPBComputeStringSizeNoTag(NSString *value) {
|
size_t GPBComputeStringSizeNoTag(NSString *value) {
|
||||||
// If you are concerned about embedded NULLs see the test in
|
NSUInteger length = [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
|
||||||
// +load above.
|
|
||||||
const char *quickString =
|
|
||||||
CFStringGetCStringPtr((CFStringRef)value, kCFStringEncodingUTF8);
|
|
||||||
NSUInteger length =
|
|
||||||
(quickString != NULL)
|
|
||||||
? strlen(quickString)
|
|
||||||
: [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
|
|
||||||
return GPBComputeRawVarint32SizeForInteger(length) + length;
|
return GPBComputeRawVarint32SizeForInteger(length) + length;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1052,7 +1052,15 @@ static void AppendStringEscaped(NSString *toPrint, NSMutableString *destStr) {
|
|||||||
case '\'': [destStr appendString:@"\\\'"]; break;
|
case '\'': [destStr appendString:@"\\\'"]; break;
|
||||||
case '\\': [destStr appendString:@"\\\\"]; break;
|
case '\\': [destStr appendString:@"\\\\"]; break;
|
||||||
default:
|
default:
|
||||||
[destStr appendFormat:@"%C", aChar];
|
// This differs slightly from the C++ code in that the C++ doesn't
|
||||||
|
// generate UTF8; it looks at the string in UTF8, but escapes every
|
||||||
|
// byte > 0x7E.
|
||||||
|
if (aChar < 0x20) {
|
||||||
|
[destStr appendFormat:@"\\%d%d%d",
|
||||||
|
(aChar / 64), ((aChar % 64) / 8), (aChar % 8)];
|
||||||
|
} else {
|
||||||
|
[destStr appendFormat:@"%C", aChar];
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -193,6 +193,32 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
- (void)assertWriteStringNoTag:(NSData*)data
|
||||||
|
value:(NSString *)value
|
||||||
|
context:(NSString *)contextMessage {
|
||||||
|
NSOutputStream* rawOutput = [NSOutputStream outputStreamToMemory];
|
||||||
|
GPBCodedOutputStream* output =
|
||||||
|
[GPBCodedOutputStream streamWithOutputStream:rawOutput];
|
||||||
|
[output writeStringNoTag:value];
|
||||||
|
[output flush];
|
||||||
|
|
||||||
|
NSData* actual =
|
||||||
|
[rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey];
|
||||||
|
XCTAssertEqualObjects(data, actual, @"%@", contextMessage);
|
||||||
|
|
||||||
|
// Try different block sizes.
|
||||||
|
for (int blockSize = 1; blockSize <= 16; blockSize *= 2) {
|
||||||
|
rawOutput = [NSOutputStream outputStreamToMemory];
|
||||||
|
output = [GPBCodedOutputStream streamWithOutputStream:rawOutput
|
||||||
|
bufferSize:blockSize];
|
||||||
|
[output writeStringNoTag:value];
|
||||||
|
[output flush];
|
||||||
|
|
||||||
|
actual = [rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey];
|
||||||
|
XCTAssertEqualObjects(data, actual, @"%@", contextMessage);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
- (void)testWriteVarint1 {
|
- (void)testWriteVarint1 {
|
||||||
[self assertWriteVarint:bytes(0x00) value:0];
|
[self assertWriteVarint:bytes(0x00) value:0];
|
||||||
}
|
}
|
||||||
@ -337,4 +363,64 @@
|
|||||||
XCTAssertEqualObjects(rawBytes, goldenData);
|
XCTAssertEqualObjects(rawBytes, goldenData);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
- (void)testCFStringGetCStringPtrAndStringsWithNullChars {
|
||||||
|
// This test exists to verify that CFStrings with embedded NULLs still expose
|
||||||
|
// their raw buffer if they are backed by UTF8 storage. If this fails, the
|
||||||
|
// quick/direct access paths in GPBCodedOutputStream that depend on
|
||||||
|
// CFStringGetCStringPtr need to be re-evalutated (maybe just removed).
|
||||||
|
// And yes, we do get NULLs in strings from some servers.
|
||||||
|
|
||||||
|
char zeroTest[] = "\0Test\0String";
|
||||||
|
// Note: there is a \0 at the end of this since it is a c-string.
|
||||||
|
NSString *asNSString = [[NSString alloc] initWithBytes:zeroTest
|
||||||
|
length:sizeof(zeroTest)
|
||||||
|
encoding:NSUTF8StringEncoding];
|
||||||
|
const char *cString =
|
||||||
|
CFStringGetCStringPtr((CFStringRef)asNSString, kCFStringEncodingUTF8);
|
||||||
|
XCTAssertTrue(cString != NULL);
|
||||||
|
// Again, if the above assert fails, then it means NSString no longer exposes
|
||||||
|
// the raw utf8 storage of a string created from utf8 input, so the code using
|
||||||
|
// CFStringGetCStringPtr in GPBCodedOutputStream will still work (it will take
|
||||||
|
// a different code path); but the optimizations for when
|
||||||
|
// CFStringGetCStringPtr does work could possibly go away.
|
||||||
|
|
||||||
|
XCTAssertEqual(sizeof(zeroTest),
|
||||||
|
[asNSString lengthOfBytesUsingEncoding:NSUTF8StringEncoding]);
|
||||||
|
XCTAssertTrue(0 == memcmp(cString, zeroTest, sizeof(zeroTest)));
|
||||||
|
[asNSString release];
|
||||||
|
}
|
||||||
|
|
||||||
|
- (void)testWriteStringsWithZeroChar {
|
||||||
|
// Unicode allows `\0` as a character, and NSString is a class cluster, so
|
||||||
|
// there are a few different classes that could end up beind a given string.
|
||||||
|
// Historically, we've seen differences based on constant strings in code and
|
||||||
|
// strings built via the NSString apis. So this round trips them to ensure
|
||||||
|
// they are acting as expected.
|
||||||
|
|
||||||
|
NSArray<NSString *> *strs = @[
|
||||||
|
@"\0at start",
|
||||||
|
@"in\0middle",
|
||||||
|
@"at end\0",
|
||||||
|
];
|
||||||
|
int i = 0;
|
||||||
|
for (NSString *str in strs) {
|
||||||
|
NSData *asUTF8 = [str dataUsingEncoding:NSUTF8StringEncoding];
|
||||||
|
NSMutableData *expected = [NSMutableData data];
|
||||||
|
uint8_t lengthByte = (uint8_t)asUTF8.length;
|
||||||
|
[expected appendBytes:&lengthByte length:1];
|
||||||
|
[expected appendData:asUTF8];
|
||||||
|
|
||||||
|
NSString *context = [NSString stringWithFormat:@"Loop %d - Literal", i];
|
||||||
|
[self assertWriteStringNoTag:expected value:str context:context];
|
||||||
|
|
||||||
|
// Force a new string to be built which gets a different class from the
|
||||||
|
// NSString class cluster than the literal did.
|
||||||
|
NSString *str2 = [NSString stringWithFormat:@"%@", str];
|
||||||
|
context = [NSString stringWithFormat:@"Loop %d - Built", i];
|
||||||
|
[self assertWriteStringNoTag:expected value:str2 context:context];
|
||||||
|
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@end
|
@end
|
||||||
|
Loading…
Reference in New Issue
Block a user