Updated PHP upb, and utf8_range for Ruby/PHP (#9585)

* Updated PHP to the new version of upb.

This is a large change, as the upb API surface has been
renamed to follow Google style more closely.

* Fixed utf8_range.

* Updated Ruby for new utf8_range.

* Picked up new upb for PHP, with spelling fixes.

* Fixed the 32-bit build.
This commit is contained in:
Joshua Haberman 2022-03-05 11:55:57 -08:00 committed by GitHub
parent 0a246e23f8
commit 349d74d92e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
33 changed files with 8994 additions and 6419 deletions

View File

@ -38,7 +38,7 @@
typedef struct Arena {
zend_object std;
upb_arena* arena;
upb_Arena* arena;
} Arena;
zend_class_entry *Arena_class_entry;
@ -50,14 +50,14 @@ static zend_object* Arena_Create(zend_class_entry *class_type) {
Arena *intern = emalloc(sizeof(Arena));
zend_object_std_init(&intern->std, class_type);
intern->std.handlers = &Arena_object_handlers;
intern->arena = upb_arena_new();
intern->arena = upb_Arena_New();
// Skip object_properties_init(), we don't allow derived classes.
return &intern->std;
}
static void Arena_Free(zend_object* obj) {
Arena* intern = (Arena*)obj;
upb_arena_free(intern->arena);
upb_Arena_Free(intern->arena);
zend_object_std_dtor(&intern->std);
}
@ -67,7 +67,7 @@ void Arena_Init(zval* val) {
ZVAL_OBJ(val, Arena_Create(Arena_class_entry));
}
upb_arena *Arena_Get(zval *val) {
upb_Arena *Arena_Get(zval *val) {
Arena *a = (Arena*)Z_OBJ_P(val);
return a->arena;
}

View File

@ -38,10 +38,10 @@
// Registers the PHP Arena class.
void Arena_ModuleInit();
// Creates and returns a new arena object that wraps a new upb_arena*.
// Creates and returns a new arena object that wraps a new upb_Arena*.
void Arena_Init(zval *val);
// Gets the underlying upb_arena from this arena object.
upb_arena *Arena_Get(zval *arena);
// Gets the underlying upb_Arena from this arena object.
upb_Arena *Arena_Get(zval *arena);
#endif // PHP_PROTOBUF_ARENA_H_

View File

@ -54,7 +54,7 @@ static void RepeatedFieldIter_make(zval *val, zval *repeated_field);
typedef struct {
zend_object std;
zval arena;
upb_array *array;
upb_Array *array;
TypeInfo type;
} RepeatedField;
@ -120,14 +120,14 @@ static int RepeatedField_compare_objects(zval *rf1, zval *rf2) {
*/
static zend_object *RepeatedField_clone_obj(PROTO_VAL *object) {
RepeatedField* intern = PROTO_VAL_P(object);
upb_arena *arena = Arena_Get(&intern->arena);
upb_array *clone = upb_array_new(arena, intern->type.type);
size_t n = upb_array_size(intern->array);
upb_Arena *arena = Arena_Get(&intern->arena);
upb_Array *clone = upb_Array_New(arena, intern->type.type);
size_t n = upb_Array_Size(intern->array);
size_t i;
for (i = 0; i < n; i++) {
upb_msgval msgval = upb_array_get(intern->array, i);
upb_array_append(clone, msgval, arena);
upb_MessageValue msgval = upb_Array_Get(intern->array, i);
upb_Array_Append(clone, msgval, arena);
}
zval ret;
@ -149,7 +149,7 @@ static zval *RepeatedField_GetPropertyPtrPtr(PROTO_VAL *object,
// These are documented in the header file.
void RepeatedField_GetPhpWrapper(zval *val, upb_array *arr, TypeInfo type,
void RepeatedField_GetPhpWrapper(zval *val, upb_Array *arr, TypeInfo type,
zval *arena) {
if (!arr) {
ZVAL_NULL(val);
@ -169,15 +169,15 @@ void RepeatedField_GetPhpWrapper(zval *val, upb_array *arr, TypeInfo type,
}
}
upb_array *RepeatedField_GetUpbArray(zval *val, TypeInfo type,
upb_arena *arena) {
upb_Array *RepeatedField_GetUpbArray(zval *val, TypeInfo type,
upb_Arena *arena) {
if (Z_ISREF_P(val)) {
ZVAL_DEREF(val);
}
if (Z_TYPE_P(val) == IS_ARRAY) {
// Auto-construct, eg. [1, 2, 3] -> upb_array([1, 2, 3]).
upb_array *arr = upb_array_new(arena, type.type);
// Auto-construct, eg. [1, 2, 3] -> upb_Array([1, 2, 3]).
upb_Array *arr = upb_Array_New(arena, type.type);
HashTable *table = HASH_OF(val);
HashPosition pos;
@ -185,7 +185,7 @@ upb_array *RepeatedField_GetUpbArray(zval *val, TypeInfo type,
while (true) {
zval *zv = zend_hash_get_current_data_ex(table, &pos);
upb_msgval val;
upb_MessageValue val;
if (!zv) return arr;
@ -193,12 +193,12 @@ upb_array *RepeatedField_GetUpbArray(zval *val, TypeInfo type,
return NULL;
}
upb_array_append(arr, val, arena);
upb_Array_Append(arr, val, arena);
zend_hash_move_forward_ex(table, &pos);
}
} else if (Z_TYPE_P(val) == IS_OBJECT &&
Z_OBJCE_P(val) == RepeatedField_class_entry) {
// Unwrap existing RepeatedField object to get the upb_array* inside.
// Unwrap existing RepeatedField object to get the upb_Array* inside.
RepeatedField *intern = (RepeatedField*)Z_OBJ_P(val);
if (!TypeInfo_Eq(intern->type, type)) {
@ -206,7 +206,7 @@ upb_array *RepeatedField_GetUpbArray(zval *val, TypeInfo type,
"Wrong type for this repeated field.");
}
upb_arena_fuse(arena, Arena_Get(&intern->arena));
upb_Arena_Fuse(arena, Arena_Get(&intern->arena));
return intern->array;
} else {
php_error_docref(NULL, E_USER_ERROR, "Must be a repeated field");
@ -214,19 +214,19 @@ upb_array *RepeatedField_GetUpbArray(zval *val, TypeInfo type,
}
}
bool ArrayEq(const upb_array *a1, const upb_array *a2, TypeInfo type) {
bool ArrayEq(const upb_Array *a1, const upb_Array *a2, TypeInfo type) {
size_t i;
size_t n;
if ((a1 == NULL) != (a2 == NULL)) return false;
if (a1 == NULL) return true;
n = upb_array_size(a1);
if (n != upb_array_size(a2)) return false;
n = upb_Array_Size(a1);
if (n != upb_Array_Size(a2)) return false;
for (i = 0; i < n; i++) {
upb_msgval val1 = upb_array_get(a1, i);
upb_msgval val2 = upb_array_get(a2, i);
upb_MessageValue val1 = upb_Array_Get(a1, i);
upb_MessageValue val2 = upb_Array_Get(a2, i);
if (!ValueEq(val1, val2, type)) return false;
}
@ -245,7 +245,7 @@ bool ArrayEq(const upb_array *a1, const upb_array *a2, TypeInfo type) {
*/
PHP_METHOD(RepeatedField, __construct) {
RepeatedField *intern = (RepeatedField*)Z_OBJ_P(getThis());
upb_arena *arena = Arena_Get(&intern->arena);
upb_Arena *arena = Arena_Get(&intern->arena);
zend_long type;
zend_class_entry* klass = NULL;
@ -256,13 +256,13 @@ PHP_METHOD(RepeatedField, __construct) {
intern->type.type = pbphp_dtype_to_type(type);
intern->type.desc = Descriptor_GetFromClassEntry(klass);
if (intern->type.type == UPB_TYPE_MESSAGE && klass == NULL) {
if (intern->type.type == kUpb_CType_Message && klass == NULL) {
php_error_docref(NULL, E_USER_ERROR,
"Message/enum type must have concrete class.");
return;
}
intern->array = upb_array_new(arena, intern->type.type);
intern->array = upb_Array_New(arena, intern->type.type);
ObjCache_Add(intern->array, &intern->std);
}
@ -274,16 +274,16 @@ PHP_METHOD(RepeatedField, __construct) {
*/
PHP_METHOD(RepeatedField, append) {
RepeatedField *intern = (RepeatedField*)Z_OBJ_P(getThis());
upb_arena *arena = Arena_Get(&intern->arena);
upb_Arena *arena = Arena_Get(&intern->arena);
zval *php_val;
upb_msgval msgval;
upb_MessageValue msgval;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "z", &php_val) != SUCCESS ||
!Convert_PhpToUpb(php_val, &msgval, intern->type, arena)) {
return;
}
upb_array_append(intern->array, msgval, arena);
upb_Array_Append(intern->array, msgval, arena);
}
/**
@ -305,7 +305,7 @@ PHP_METHOD(RepeatedField, offsetExists) {
return;
}
RETURN_BOOL(index >= 0 && index < upb_array_size(intern->array));
RETURN_BOOL(index >= 0 && index < upb_Array_Size(intern->array));
}
/**
@ -323,19 +323,19 @@ PHP_METHOD(RepeatedField, offsetExists) {
PHP_METHOD(RepeatedField, offsetGet) {
RepeatedField *intern = (RepeatedField*)Z_OBJ_P(getThis());
zend_long index;
upb_msgval msgval;
upb_MessageValue msgval;
zval ret;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &index) == FAILURE) {
return;
}
if (index < 0 || index >= upb_array_size(intern->array)) {
if (index < 0 || index >= upb_Array_Size(intern->array)) {
zend_error(E_USER_ERROR, "Element at %ld doesn't exist.\n", index);
return;
}
msgval = upb_array_get(intern->array, index);
msgval = upb_Array_Get(intern->array, index);
Convert_UpbToPhp(msgval, &ret, intern->type, &intern->arena);
RETURN_COPY_VALUE(&ret);
}
@ -356,11 +356,11 @@ PHP_METHOD(RepeatedField, offsetGet) {
*/
PHP_METHOD(RepeatedField, offsetSet) {
RepeatedField *intern = (RepeatedField*)Z_OBJ_P(getThis());
upb_arena *arena = Arena_Get(&intern->arena);
size_t size = upb_array_size(intern->array);
upb_Arena *arena = Arena_Get(&intern->arena);
size_t size = upb_Array_Size(intern->array);
zval *offset, *val;
int64_t index;
upb_msgval msgval;
upb_MessageValue msgval;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "zz", &offset, &val) != SUCCESS) {
return;
@ -379,9 +379,9 @@ PHP_METHOD(RepeatedField, offsetSet) {
if (index > size) {
zend_error(E_USER_ERROR, "Element at index %ld doesn't exist.\n", index);
} else if (index == size) {
upb_array_append(intern->array, msgval, Arena_Get(&intern->arena));
upb_Array_Append(intern->array, msgval, Arena_Get(&intern->arena));
} else {
upb_array_set(intern->array, index, msgval);
upb_Array_Set(intern->array, index, msgval);
}
}
@ -399,7 +399,7 @@ PHP_METHOD(RepeatedField, offsetSet) {
PHP_METHOD(RepeatedField, offsetUnset) {
RepeatedField *intern = (RepeatedField*)Z_OBJ_P(getThis());
zend_long index;
zend_long size = upb_array_size(intern->array);
zend_long size = upb_Array_Size(intern->array);
// Only the element at the end of the array can be removed.
if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &index) != SUCCESS) {
@ -412,7 +412,7 @@ PHP_METHOD(RepeatedField, offsetUnset) {
return;
}
upb_array_resize(intern->array, size - 1, Arena_Get(&intern->arena));
upb_Array_Resize(intern->array, size - 1, Arena_Get(&intern->arena));
}
/**
@ -432,7 +432,7 @@ PHP_METHOD(RepeatedField, count) {
return;
}
RETURN_LONG(upb_array_size(intern->array));
RETURN_LONG(upb_Array_Size(intern->array));
}
/**
@ -581,16 +581,16 @@ PHP_METHOD(RepeatedFieldIter, rewind) {
PHP_METHOD(RepeatedFieldIter, current) {
RepeatedFieldIter *intern = (RepeatedFieldIter*)Z_OBJ_P(getThis());
RepeatedField *field = (RepeatedField*)Z_OBJ_P(&intern->repeated_field);
upb_array *array = field->array;
upb_Array *array = field->array;
zend_long index = intern->position;
upb_msgval msgval;
upb_MessageValue msgval;
zval ret;
if (index < 0 || index >= upb_array_size(array)) {
if (index < 0 || index >= upb_Array_Size(array)) {
zend_error(E_USER_ERROR, "Element at %ld doesn't exist.\n", index);
}
msgval = upb_array_get(array, index);
msgval = upb_Array_Get(array, index);
Convert_UpbToPhp(msgval, &ret, field->type, &field->arena);
RETURN_COPY_VALUE(&ret);
@ -624,7 +624,7 @@ PHP_METHOD(RepeatedFieldIter, next) {
PHP_METHOD(RepeatedFieldIter, valid) {
RepeatedFieldIter *intern = (RepeatedFieldIter*)Z_OBJ_P(getThis());
RepeatedField *field = (RepeatedField*)Z_OBJ_P(&intern->repeated_field);
RETURN_BOOL(intern->position < upb_array_size(field->array));
RETURN_BOOL(intern->position < upb_Array_Size(field->array));
}
ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_current, 0, 0, IS_MIXED, 0)

View File

@ -39,29 +39,29 @@
// Registers PHP classes for RepeatedField.
void Array_ModuleInit();
// Gets a upb_array* for the PHP object |val|:
// Gets a upb_Array* for the PHP object |val|:
// * If |val| is a RepeatedField object, we first check its type and verify
// that that the elements have the correct type for |type|. If so, we return
// the wrapped upb_array*. We also make sure that this array's arena is fused
// to |arena|, so the returned upb_array is guaranteed to live as long as
// the wrapped upb_Array*. We also make sure that this array's arena is fused
// to |arena|, so the returned upb_Array is guaranteed to live as long as
// |arena|.
// * If |val| is a PHP Array, we attempt to create a new upb_array using
// * If |val| is a PHP Array, we attempt to create a new upb_Array using
// |arena| and add all of the PHP elements to it.
//
// If an error occurs, we raise a PHP error and return NULL.
upb_array *RepeatedField_GetUpbArray(zval *val, TypeInfo type,
upb_arena *arena);
upb_Array *RepeatedField_GetUpbArray(zval *val, TypeInfo type,
upb_Arena *arena);
// Creates a PHP RepeatedField object for the given upb_array* and |type| and
// Creates a PHP RepeatedField object for the given upb_Array* and |type| and
// returns it in |val|. The PHP object will keep a reference to this |arena| to
// ensure the underlying array data stays alive.
//
// If |arr| is NULL, this will return a PHP null object.
void RepeatedField_GetPhpWrapper(zval *val, upb_array *arr, TypeInfo type,
void RepeatedField_GetPhpWrapper(zval *val, upb_Array *arr, TypeInfo type,
zval *arena);
// Returns true if the given arrays are equal. Both arrays must be of this
// |type| and, if the type is |UPB_TYPE_MESSAGE|, must have the same |m|.
bool ArrayEq(const upb_array *a1, const upb_array *a2, TypeInfo type);
// |type| and, if the type is |kUpb_CType_Message|, must have the same |m|.
bool ArrayEq(const upb_Array *a1, const upb_Array *a2, TypeInfo type);
#endif // PHP_PROTOBUF_ARRAY_H_

View File

@ -4,7 +4,7 @@ if test "$PHP_PROTOBUF" != "no"; then
PHP_NEW_EXTENSION(
protobuf,
arena.c array.c convert.c def.c map.c message.c names.c php-upb.c protobuf.c,
arena.c array.c convert.c def.c map.c message.c names.c php-upb.c protobuf.c third_party/utf8_range/naive.c third_party/utf8_range/range2-neon.c third_party/utf8_range/range2-sse.c,
$ext_shared, , -std=gnu99)
fi

View File

@ -153,30 +153,30 @@ static zend_function_entry util_methods[] = {
// Conversion functions used from C
// -----------------------------------------------------------------------------
upb_fieldtype_t pbphp_dtype_to_type(upb_descriptortype_t type) {
upb_CType pbphp_dtype_to_type(upb_FieldType type) {
switch (type) {
#define CASE(descriptor_type, type) \
case UPB_DESCRIPTOR_TYPE_##descriptor_type: \
return UPB_TYPE_##type;
case kUpb_FieldType_##descriptor_type: \
return kUpb_CType_##type;
CASE(FLOAT, FLOAT);
CASE(DOUBLE, DOUBLE);
CASE(BOOL, BOOL);
CASE(STRING, STRING);
CASE(BYTES, BYTES);
CASE(MESSAGE, MESSAGE);
CASE(GROUP, MESSAGE);
CASE(ENUM, ENUM);
CASE(INT32, INT32);
CASE(INT64, INT64);
CASE(UINT32, UINT32);
CASE(UINT64, UINT64);
CASE(SINT32, INT32);
CASE(SINT64, INT64);
CASE(FIXED32, UINT32);
CASE(FIXED64, UINT64);
CASE(SFIXED32, INT32);
CASE(SFIXED64, INT64);
CASE(Float, Float);
CASE(Double, Double);
CASE(Bool, Bool);
CASE(String, String);
CASE(Bytes, Bytes);
CASE(Message, Message);
CASE(Group, Message);
CASE(Enum, Enum);
CASE(Int32, Int32);
CASE(Int64, Int64);
CASE(UInt32, Int32);
CASE(UInt64, UInt64);
CASE(SInt32, Int32);
CASE(SInt64, Int64);
CASE(Fixed32, UInt32);
CASE(Fixed64, UInt64);
CASE(SFixed32, Int32);
CASE(SFixed64, Int64);
#undef CASE
@ -353,8 +353,8 @@ static bool to_string(zval* from) {
}
}
bool Convert_PhpToUpb(zval *php_val, upb_msgval *upb_val, TypeInfo type,
upb_arena *arena) {
bool Convert_PhpToUpb(zval *php_val, upb_MessageValue *upb_val, TypeInfo type,
upb_Arena *arena) {
int64_t i64;
if (Z_ISREF_P(php_val)) {
@ -362,37 +362,37 @@ bool Convert_PhpToUpb(zval *php_val, upb_msgval *upb_val, TypeInfo type,
}
switch (type.type) {
case UPB_TYPE_INT64:
case kUpb_CType_Int64:
return Convert_PhpToInt64(php_val, &upb_val->int64_val);
case UPB_TYPE_INT32:
case UPB_TYPE_ENUM:
case kUpb_CType_Int32:
case kUpb_CType_Enum:
if (!Convert_PhpToInt64(php_val, &i64)) {
return false;
}
upb_val->int32_val = i64;
return true;
case UPB_TYPE_UINT64:
case kUpb_CType_UInt64:
if (!Convert_PhpToInt64(php_val, &i64)) {
return false;
}
upb_val->uint64_val = i64;
return true;
case UPB_TYPE_UINT32:
case kUpb_CType_UInt32:
if (!Convert_PhpToInt64(php_val, &i64)) {
return false;
}
upb_val->uint32_val = i64;
return true;
case UPB_TYPE_DOUBLE:
case kUpb_CType_Double:
return to_double(php_val, &upb_val->double_val);
case UPB_TYPE_FLOAT:
case kUpb_CType_Float:
if (!to_double(php_val, &upb_val->double_val)) return false;
upb_val->float_val = upb_val->double_val;
return true;
case UPB_TYPE_BOOL:
case kUpb_CType_Bool:
return to_bool(php_val, &upb_val->bool_val);
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES: {
case kUpb_CType_String:
case kUpb_CType_Bytes: {
char *ptr;
size_t size;
@ -401,30 +401,30 @@ bool Convert_PhpToUpb(zval *php_val, upb_msgval *upb_val, TypeInfo type,
size = Z_STRLEN_P(php_val);
// If arena is NULL we reference the input zval.
// The resulting upb_strview will only be value while the zval is alive.
// The resulting upb_StringView will only be value while the zval is alive.
if (arena) {
ptr = upb_arena_malloc(arena, size);
ptr = upb_Arena_Malloc(arena, size);
memcpy(ptr, Z_STRVAL_P(php_val), size);
} else {
ptr = Z_STRVAL_P(php_val);
}
upb_val->str_val = upb_strview_make(ptr, size);
upb_val->str_val = upb_StringView_FromDataAndSize(ptr, size);
return true;
}
case UPB_TYPE_MESSAGE:
case kUpb_CType_Message:
PBPHP_ASSERT(type.desc);
return Message_GetUpbMessage(php_val, type.desc, arena,
(upb_msg **)&upb_val->msg_val);
(upb_Message **)&upb_val->msg_val);
}
return false;
}
void Convert_UpbToPhp(upb_msgval upb_val, zval *php_val, TypeInfo type,
void Convert_UpbToPhp(upb_MessageValue upb_val, zval *php_val, TypeInfo type,
zval *arena) {
switch (type.type) {
case UPB_TYPE_INT64:
case kUpb_CType_Int64:
#if SIZEOF_ZEND_LONG == 8
ZVAL_LONG(php_val, upb_val.int64_val);
#else
@ -435,7 +435,7 @@ void Convert_UpbToPhp(upb_msgval upb_val, zval *php_val, TypeInfo type,
}
#endif
break;
case UPB_TYPE_UINT64:
case kUpb_CType_UInt64:
#if SIZEOF_ZEND_LONG == 8
ZVAL_LONG(php_val, upb_val.uint64_val);
#else
@ -446,51 +446,70 @@ void Convert_UpbToPhp(upb_msgval upb_val, zval *php_val, TypeInfo type,
}
#endif
break;
case UPB_TYPE_INT32:
case UPB_TYPE_ENUM:
case kUpb_CType_Int32:
case kUpb_CType_Enum:
ZVAL_LONG(php_val, upb_val.int32_val);
break;
case UPB_TYPE_UINT32: {
case kUpb_CType_UInt32: {
// Sign-extend for consistency between 32/64-bit builds.
zend_long val = (int32_t)upb_val.uint32_val;
ZVAL_LONG(php_val, val);
break;
}
case UPB_TYPE_DOUBLE:
case kUpb_CType_Double:
ZVAL_DOUBLE(php_val, upb_val.double_val);
break;
case UPB_TYPE_FLOAT:
case kUpb_CType_Float:
ZVAL_DOUBLE(php_val, upb_val.float_val);
break;
case UPB_TYPE_BOOL:
case kUpb_CType_Bool:
ZVAL_BOOL(php_val, upb_val.bool_val);
break;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES: {
upb_strview str = upb_val.str_val;
case kUpb_CType_String:
case kUpb_CType_Bytes: {
upb_StringView str = upb_val.str_val;
ZVAL_NEW_STR(php_val, zend_string_init(str.data, str.size, 0));
break;
}
case UPB_TYPE_MESSAGE:
case kUpb_CType_Message:
PBPHP_ASSERT(type.desc);
Message_GetPhpWrapper(php_val, type.desc, (upb_msg *)upb_val.msg_val,
Message_GetPhpWrapper(php_val, type.desc, (upb_Message *)upb_val.msg_val,
arena);
break;
}
}
bool Convert_PhpToUpbAutoWrap(zval *val, upb_msgval *upb_val, TypeInfo type,
upb_arena *arena) {
const upb_msgdef *subm = type.desc ? type.desc->msgdef : NULL;
if (subm && upb_msgdef_iswrapper(subm) && Z_TYPE_P(val) != IS_OBJECT) {
// Check if the field is a well known wrapper type
static bool IsWrapper(const upb_MessageDef* m) {
if (!m) return false;
switch (upb_MessageDef_WellKnownType(m)) {
case kUpb_WellKnown_DoubleValue:
case kUpb_WellKnown_FloatValue:
case kUpb_WellKnown_Int64Value:
case kUpb_WellKnown_UInt64Value:
case kUpb_WellKnown_Int32Value:
case kUpb_WellKnown_UInt32Value:
case kUpb_WellKnown_StringValue:
case kUpb_WellKnown_BytesValue:
case kUpb_WellKnown_BoolValue:
return true;
default:
return false;
}
}
bool Convert_PhpToUpbAutoWrap(zval *val, upb_MessageValue *upb_val, TypeInfo type,
upb_Arena *arena) {
const upb_MessageDef *subm = type.desc ? type.desc->msgdef : NULL;
if (subm && IsWrapper(subm) && Z_TYPE_P(val) != IS_OBJECT) {
// Assigning a scalar to a wrapper-typed value. We will automatically wrap
// the value, so the user doesn't need to create a FooWrapper(['value': X])
// message manually.
upb_msg *wrapper = upb_msg_new(subm, arena);
const upb_fielddef *val_f = upb_msgdef_itof(subm, 1);
upb_msgval msgval;
upb_Message *wrapper = upb_Message_New(subm, arena);
const upb_FieldDef *val_f = upb_MessageDef_FindFieldByNumber(subm, 1);
upb_MessageValue msgval;
if (!Convert_PhpToUpb(val, &msgval, TypeInfo_Get(val_f), arena)) return false;
upb_msg_set(wrapper, val_f, msgval, arena);
upb_Message_Set(wrapper, val_f, msgval, arena);
upb_val->msg_val = wrapper;
return true;
} else {

View File

@ -36,18 +36,18 @@
#include "php-upb.h"
#include "def.h"
upb_fieldtype_t pbphp_dtype_to_type(upb_descriptortype_t type);
upb_CType pbphp_dtype_to_type(upb_FieldType type);
// Converts |php_val| to an int64_t. Returns false if the value cannot be
// converted.
bool Convert_PhpToInt64(const zval *php_val, int64_t *i64);
// Converts |php_val| to a upb_msgval according to |type|. If type is
// UPB_TYPE_MESSAGE, then |desc| must be the Descriptor for this message type.
// Converts |php_val| to a upb_MessageValue according to |type|. If type is
// kUpb_CType_Message, then |desc| must be the Descriptor for this message type.
// If type is string, message, or bytes, then |arena| will be used to copy
// string data or fuse this arena to the given message's arena.
bool Convert_PhpToUpb(zval *php_val, upb_msgval *upb_val, TypeInfo type,
upb_arena *arena);
bool Convert_PhpToUpb(zval *php_val, upb_MessageValue *upb_val, TypeInfo type,
upb_Arena *arena);
// Similar to Convert_PhpToUpb, but supports automatically wrapping the wrapper
// types if a primitive is specified:
@ -56,15 +56,15 @@ bool Convert_PhpToUpb(zval *php_val, upb_msgval *upb_val, TypeInfo type,
//
// We currently allow this implicit conversion in initializers, but not for
// assignment.
bool Convert_PhpToUpbAutoWrap(zval *val, upb_msgval *upb_val, TypeInfo type,
upb_arena *arena);
bool Convert_PhpToUpbAutoWrap(zval *val, upb_MessageValue *upb_val, TypeInfo type,
upb_Arena *arena);
// Converts |upb_val| to a PHP zval according to |type|. This may involve
// creating a PHP wrapper object. Any newly created wrapper object
// will reference |arena|.
//
// The caller owns a reference to the returned value.
void Convert_UpbToPhp(upb_msgval upb_val, zval *php_val, TypeInfo type,
void Convert_UpbToPhp(upb_MessageValue upb_val, zval *php_val, TypeInfo type,
zval *arena);
// Registers the GPBUtil class.

View File

@ -39,13 +39,13 @@
#include "php-upb.h"
#include "protobuf.h"
static void CheckUpbStatus(const upb_status* status, const char* msg) {
if (!upb_ok(status)) {
zend_error(E_ERROR, "%s: %s\n", msg, upb_status_errmsg(status));
static void CheckUpbStatus(const upb_Status* status, const char* msg) {
if (!upb_Status_IsOk(status)) {
zend_error(E_ERROR, "%s: %s\n", msg, upb_Status_ErrorMessage(status));
}
}
static void FieldDescriptor_FromFieldDef(zval *val, const upb_fielddef *f);
static void FieldDescriptor_FromFieldDef(zval *val, const upb_FieldDef *f);
// We use this for objects that should not be created directly from PHP.
static zend_object *CreateHandler_ReturnNull(zend_class_entry *class_type) {
@ -117,7 +117,7 @@ static zend_function_entry EnumValueDescriptor_methods[] = {
typedef struct {
zend_object std;
const upb_enumdef *enumdef;
const upb_EnumDef *enumdef;
void *cache_key;
} EnumDescriptor;
@ -141,7 +141,7 @@ static void EnumDescriptor_FromClassEntry(zval *val, zend_class_entry *ce) {
}
if (!ObjCache_Get(key, val)) {
const upb_enumdef *e = NameMap_GetEnum(ce);
const upb_EnumDef *e = NameMap_GetEnum(ce);
if (!e) {
ZVAL_NULL(val);
return;
@ -157,12 +157,12 @@ static void EnumDescriptor_FromClassEntry(zval *val, zend_class_entry *ce) {
}
// Caller owns a ref on the returned zval.
static void EnumDescriptor_FromEnumDef(zval *val, const upb_enumdef *m) {
static void EnumDescriptor_FromEnumDef(zval *val, const upb_EnumDef *m) {
if (!m) {
ZVAL_NULL(val);
} else {
char *classname =
GetPhpClassname(upb_enumdef_file(m), upb_enumdef_fullname(m));
GetPhpClassname(upb_EnumDef_File(m), upb_EnumDef_FullName(m));
zend_string *str = zend_string_init(classname, strlen(classname), 0);
zend_class_entry *ce = zend_lookup_class(str); // May autoload the class.
@ -193,20 +193,14 @@ PHP_METHOD(EnumDescriptor, getValue) {
return;
}
int field_num = upb_enumdef_numvals(intern->enumdef);
if (index < 0 || index >= field_num) {
if (index < 0 || index >= upb_EnumDef_ValueCount(intern->enumdef)) {
zend_error(E_USER_ERROR, "Cannot get element at %ld.\n", index);
return;
}
upb_enum_iter iter;
int i;
for(upb_enum_begin(&iter, intern->enumdef), i = 0;
!upb_enum_done(&iter) && i < index;
upb_enum_next(&iter), i++);
EnumValueDescriptor_Make(&ret, upb_enum_iter_name(&iter),
upb_enum_iter_number(&iter));
const upb_EnumValueDef* ev = upb_EnumDef_Value(intern->enumdef, index);
EnumValueDescriptor_Make(&ret, upb_EnumValueDef_Name(ev),
upb_EnumValueDef_Number(ev));
RETURN_COPY_VALUE(&ret);
}
@ -217,7 +211,7 @@ PHP_METHOD(EnumDescriptor, getValue) {
*/
PHP_METHOD(EnumDescriptor, getValueCount) {
EnumDescriptor *intern = (EnumDescriptor*)Z_OBJ_P(getThis());
RETURN_LONG(upb_enumdef_numvals(intern->enumdef));
RETURN_LONG(upb_EnumDef_ValueCount(intern->enumdef));
}
/*
@ -244,7 +238,7 @@ static zend_function_entry EnumDescriptor_methods[] = {
typedef struct {
zend_object std;
const upb_oneofdef *oneofdef;
const upb_OneofDef *oneofdef;
} OneofDescriptor;
zend_class_entry *OneofDescriptor_class_entry;
@ -255,7 +249,7 @@ static void OneofDescriptor_destructor(zend_object* obj) {
ObjCache_Delete(intern->oneofdef);
}
static void OneofDescriptor_FromOneofDef(zval *val, const upb_oneofdef *o) {
static void OneofDescriptor_FromOneofDef(zval *val, const upb_OneofDef *o) {
if (o == NULL) {
ZVAL_NULL(val);
return;
@ -278,7 +272,7 @@ static void OneofDescriptor_FromOneofDef(zval *val, const upb_oneofdef *o) {
*/
PHP_METHOD(OneofDescriptor, getName) {
OneofDescriptor *intern = (OneofDescriptor*)Z_OBJ_P(getThis());
RETURN_STRING(upb_oneofdef_name(intern->oneofdef));
RETURN_STRING(upb_OneofDef_Name(intern->oneofdef));
}
/*
@ -297,19 +291,12 @@ PHP_METHOD(OneofDescriptor, getField) {
return;
}
int field_num = upb_oneofdef_numfields(intern->oneofdef);
if (index < 0 || index >= field_num) {
if (index < 0 || index >= upb_OneofDef_FieldCount(intern->oneofdef)) {
zend_error(E_USER_ERROR, "Cannot get element at %ld.\n", index);
return;
}
upb_oneof_iter iter;
int i;
for(upb_oneof_begin(&iter, intern->oneofdef), i = 0;
!upb_oneof_done(&iter) && i < index;
upb_oneof_next(&iter), i++);
const upb_fielddef *field = upb_oneof_iter_field(&iter);
const upb_FieldDef* field = upb_OneofDef_Field(intern->oneofdef, index);
FieldDescriptor_FromFieldDef(&ret, field);
RETURN_COPY_VALUE(&ret);
}
@ -321,7 +308,7 @@ PHP_METHOD(OneofDescriptor, getField) {
*/
PHP_METHOD(OneofDescriptor, getFieldCount) {
OneofDescriptor *intern = (OneofDescriptor*)Z_OBJ_P(getThis());
RETURN_LONG(upb_oneofdef_numfields(intern->oneofdef));
RETURN_LONG(upb_OneofDef_FieldCount(intern->oneofdef));
}
static zend_function_entry OneofDescriptor_methods[] = {
@ -337,7 +324,7 @@ static zend_function_entry OneofDescriptor_methods[] = {
typedef struct {
zend_object std;
const upb_fielddef *fielddef;
const upb_FieldDef *fielddef;
} FieldDescriptor;
zend_class_entry *FieldDescriptor_class_entry;
@ -349,7 +336,7 @@ static void FieldDescriptor_destructor(zend_object* obj) {
}
// Caller owns a ref on the returned zval.
static void FieldDescriptor_FromFieldDef(zval *val, const upb_fielddef *f) {
static void FieldDescriptor_FromFieldDef(zval *val, const upb_FieldDef *f) {
if (f == NULL) {
ZVAL_NULL(val);
return;
@ -365,30 +352,30 @@ static void FieldDescriptor_FromFieldDef(zval *val, const upb_fielddef *f) {
}
}
upb_fieldtype_t to_fieldtype(upb_descriptortype_t type) {
upb_CType to_fieldtype(upb_FieldType type) {
switch (type) {
#define CASE(descriptor_type, type) \
case UPB_DESCRIPTOR_TYPE_##descriptor_type: \
return UPB_TYPE_##type;
case kUpb_FieldType_##descriptor_type: \
return kUpb_CType_##type;
CASE(FLOAT, FLOAT);
CASE(DOUBLE, DOUBLE);
CASE(BOOL, BOOL);
CASE(STRING, STRING);
CASE(BYTES, BYTES);
CASE(MESSAGE, MESSAGE);
CASE(GROUP, MESSAGE);
CASE(ENUM, ENUM);
CASE(INT32, INT32);
CASE(INT64, INT64);
CASE(UINT32, UINT32);
CASE(UINT64, UINT64);
CASE(SINT32, INT32);
CASE(SINT64, INT64);
CASE(FIXED32, UINT32);
CASE(FIXED64, UINT64);
CASE(SFIXED32, INT32);
CASE(SFIXED64, INT64);
CASE(Float, Float);
CASE(Double, Double);
CASE(Bool, Bool);
CASE(String, String);
CASE(Bytes, Bytes);
CASE(Message, Message);
CASE(Group, Message);
CASE(Enum, Enum);
CASE(Int32, Int32);
CASE(Int64, Int64);
CASE(UInt32, UInt32);
CASE(UInt64, UInt64);
CASE(SInt32, Int32);
CASE(SInt64, Int64);
CASE(Fixed32, UInt32);
CASE(Fixed64, UInt64);
CASE(SFixed32, Int32);
CASE(SFixed64, Int64);
#undef CONVERT
@ -405,7 +392,7 @@ upb_fieldtype_t to_fieldtype(upb_descriptortype_t type) {
*/
PHP_METHOD(FieldDescriptor, getName) {
FieldDescriptor *intern = (FieldDescriptor*)Z_OBJ_P(getThis());
RETURN_STRING(upb_fielddef_name(intern->fielddef));
RETURN_STRING(upb_FieldDef_Name(intern->fielddef));
}
/*
@ -415,7 +402,7 @@ PHP_METHOD(FieldDescriptor, getName) {
*/
PHP_METHOD(FieldDescriptor, getNumber) {
FieldDescriptor *intern = (FieldDescriptor*)Z_OBJ_P(getThis());
RETURN_LONG(upb_fielddef_number(intern->fielddef));
RETURN_LONG(upb_FieldDef_Number(intern->fielddef));
}
/*
@ -425,7 +412,7 @@ PHP_METHOD(FieldDescriptor, getNumber) {
*/
PHP_METHOD(FieldDescriptor, getLabel) {
FieldDescriptor *intern = (FieldDescriptor*)Z_OBJ_P(getThis());
RETURN_LONG(upb_fielddef_label(intern->fielddef));
RETURN_LONG(upb_FieldDef_Label(intern->fielddef));
}
/*
@ -435,7 +422,7 @@ PHP_METHOD(FieldDescriptor, getLabel) {
*/
PHP_METHOD(FieldDescriptor, getType) {
FieldDescriptor *intern = (FieldDescriptor*)Z_OBJ_P(getThis());
RETURN_LONG(upb_fielddef_descriptortype(intern->fielddef));
RETURN_LONG(upb_FieldDef_Type(intern->fielddef));
}
/*
@ -445,7 +432,7 @@ PHP_METHOD(FieldDescriptor, getType) {
*/
PHP_METHOD(FieldDescriptor, isMap) {
FieldDescriptor *intern = (FieldDescriptor*)Z_OBJ_P(getThis());
RETURN_BOOL(upb_fielddef_ismap(intern->fielddef));
RETURN_BOOL(upb_FieldDef_IsMap(intern->fielddef));
}
/*
@ -455,13 +442,13 @@ PHP_METHOD(FieldDescriptor, isMap) {
*/
PHP_METHOD(FieldDescriptor, getEnumType) {
FieldDescriptor *intern = (FieldDescriptor*)Z_OBJ_P(getThis());
const upb_enumdef *e = upb_fielddef_enumsubdef(intern->fielddef);
const upb_EnumDef *e = upb_FieldDef_EnumSubDef(intern->fielddef);
zval ret;
if (!e) {
zend_throw_exception_ex(NULL, 0,
"Cannot get enum type for non-enum field '%s'",
upb_fielddef_name(intern->fielddef));
upb_FieldDef_Name(intern->fielddef));
return;
}
@ -481,7 +468,7 @@ PHP_METHOD(FieldDescriptor, getMessageType) {
if (!desc) {
zend_throw_exception_ex(
NULL, 0, "Cannot get message type for non-message field '%s'",
upb_fielddef_name(intern->fielddef));
upb_FieldDef_Name(intern->fielddef));
return;
}
@ -511,9 +498,9 @@ static void Descriptor_destructor(zend_object* obj) {
// collected before the end of the request.
}
static zend_class_entry *Descriptor_GetGeneratedClass(const upb_msgdef *m) {
static zend_class_entry *Descriptor_GetGeneratedClass(const upb_MessageDef *m) {
char *classname =
GetPhpClassname(upb_msgdef_file(m), upb_msgdef_fullname(m));
GetPhpClassname(upb_MessageDef_File(m), upb_MessageDef_FullName(m));
zend_string *str = zend_string_init(classname, strlen(classname), 0);
zend_class_entry *ce = zend_lookup_class(str); // May autoload the class.
@ -527,7 +514,7 @@ static zend_class_entry *Descriptor_GetGeneratedClass(const upb_msgdef *m) {
return ce;
}
void Descriptor_FromMessageDef(zval *val, const upb_msgdef *m) {
void Descriptor_FromMessageDef(zval *val, const upb_MessageDef *m) {
if (m == NULL) {
ZVAL_NULL(val);
return;
@ -535,7 +522,7 @@ void Descriptor_FromMessageDef(zval *val, const upb_msgdef *m) {
if (!ObjCache_Get(m, val)) {
zend_class_entry *ce = NULL;
if (!upb_msgdef_mapentry(m)) { // Map entries don't have a class.
if (!upb_MessageDef_IsMapEntry(m)) { // Map entries don't have a class.
ce = Descriptor_GetGeneratedClass(m);
if (!ce) {
ZVAL_NULL(val);
@ -581,14 +568,14 @@ Descriptor* Descriptor_GetFromClassEntry(zend_class_entry *ce) {
return Descriptor_GetFromZval(&desc);
}
Descriptor* Descriptor_GetFromMessageDef(const upb_msgdef *m) {
Descriptor* Descriptor_GetFromMessageDef(const upb_MessageDef *m) {
zval desc;
Descriptor_FromMessageDef(&desc, m);
return Descriptor_GetFromZval(&desc);
}
Descriptor* Descriptor_GetFromFieldDef(const upb_fielddef *f) {
return Descriptor_GetFromMessageDef(upb_fielddef_msgsubdef(f));
Descriptor* Descriptor_GetFromFieldDef(const upb_FieldDef *f) {
return Descriptor_GetFromMessageDef(upb_FieldDef_MessageSubDef(f));
}
/*
@ -609,7 +596,7 @@ PHP_METHOD(Descriptor, getPublicDescriptor) {
*/
PHP_METHOD(Descriptor, getFullName) {
Descriptor *intern = (Descriptor*)Z_OBJ_P(getThis());
RETURN_STRING(upb_msgdef_fullname(intern->msgdef));
RETURN_STRING(upb_MessageDef_FullName(intern->msgdef));
}
/*
@ -620,7 +607,7 @@ PHP_METHOD(Descriptor, getFullName) {
*/
PHP_METHOD(Descriptor, getField) {
Descriptor *intern = (Descriptor*)Z_OBJ_P(getThis());
int count = upb_msgdef_numfields(intern->msgdef);
int count = upb_MessageDef_FieldCount(intern->msgdef);
zval ret;
zend_long index;
@ -634,7 +621,7 @@ PHP_METHOD(Descriptor, getField) {
return;
}
FieldDescriptor_FromFieldDef(&ret, upb_msgdef_field(intern->msgdef, index));
FieldDescriptor_FromFieldDef(&ret, upb_MessageDef_Field(intern->msgdef, index));
RETURN_COPY_VALUE(&ret);
}
@ -645,7 +632,7 @@ PHP_METHOD(Descriptor, getField) {
*/
PHP_METHOD(Descriptor, getFieldCount) {
Descriptor *intern = (Descriptor*)Z_OBJ_P(getThis());
RETURN_LONG(upb_msgdef_numfields(intern->msgdef));
RETURN_LONG(upb_MessageDef_FieldCount(intern->msgdef));
}
/*
@ -664,20 +651,12 @@ PHP_METHOD(Descriptor, getOneofDecl) {
return;
}
int field_num = upb_msgdef_numoneofs(intern->msgdef);
if (index < 0 || index >= field_num) {
if (index < 0 || index >= upb_MessageDef_OneofCount(intern->msgdef)) {
zend_error(E_USER_ERROR, "Cannot get element at %ld.\n", index);
return;
}
upb_msg_oneof_iter iter;
int i;
for(upb_msg_oneof_begin(&iter, intern->msgdef), i = 0;
!upb_msg_oneof_done(&iter) && i < index;
upb_msg_oneof_next(&iter), i++);
const upb_oneofdef *oneof = upb_msg_iter_oneof(&iter);
OneofDescriptor_FromOneofDef(&ret, oneof);
OneofDescriptor_FromOneofDef(&ret, upb_MessageDef_Oneof(intern->msgdef, index));
RETURN_COPY_VALUE(&ret);
}
@ -688,7 +667,7 @@ PHP_METHOD(Descriptor, getOneofDecl) {
*/
PHP_METHOD(Descriptor, getOneofDeclCount) {
Descriptor *intern = (Descriptor*)Z_OBJ_P(getThis());
RETURN_LONG(upb_msgdef_numoneofs(intern->msgdef));
RETURN_LONG(upb_MessageDef_OneofCount(intern->msgdef));
}
/*
@ -720,7 +699,7 @@ static zend_function_entry Descriptor_methods[] = {
typedef struct DescriptorPool {
zend_object std;
upb_symtab *symtab;
upb_DefPool *symtab;
} DescriptorPool;
zend_class_entry *DescriptorPool_class_entry;
@ -743,7 +722,7 @@ static void DescriptorPool_destructor(zend_object* obj) {
zend_object_std_dtor(&intern->std);
}
void DescriptorPool_CreateWithSymbolTable(zval *zv, upb_symtab *symtab) {
void DescriptorPool_CreateWithSymbolTable(zval *zv, upb_DefPool *symtab) {
DescriptorPool *intern = emalloc(sizeof(DescriptorPool));
zend_object_std_init(&intern->std, DescriptorPool_class_entry);
intern->std.handlers = &DescriptorPool_object_handlers;
@ -752,7 +731,7 @@ void DescriptorPool_CreateWithSymbolTable(zval *zv, upb_symtab *symtab) {
ZVAL_OBJ(zv, &intern->std);
}
upb_symtab *DescriptorPool_GetSymbolTable() {
upb_DefPool *DescriptorPool_GetSymbolTable() {
DescriptorPool *intern = GetPool(get_generated_pool());
return intern->symtab;
}
@ -836,7 +815,7 @@ PHP_METHOD(DescriptorPool, getDescriptorByProtoName) {
DescriptorPool *intern = GetPool(getThis());
char *protoname = NULL;
zend_long protoname_len;
const upb_msgdef *m;
const upb_MessageDef *m;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &protoname, &protoname_len) ==
FAILURE) {
@ -845,7 +824,7 @@ PHP_METHOD(DescriptorPool, getDescriptorByProtoName) {
if (*protoname == '.') protoname++;
m = upb_symtab_lookupmsg(intern->symtab, protoname);
m = upb_DefPool_FindMessageByName(intern->symtab, protoname);
if (m) {
RETURN_OBJ_COPY(&Descriptor_GetFromMessageDef(m)->std);
@ -860,13 +839,13 @@ PHP_METHOD(DescriptorPool, getDescriptorByProtoName) {
* Returns true if this FileDescriptorProto depends on descriptor.proto.
*/
bool depends_on_descriptor(const google_protobuf_FileDescriptorProto* file) {
const upb_strview *deps;
upb_strview name = upb_strview_makez("google/protobuf/descriptor.proto");
const upb_StringView *deps;
upb_StringView name = upb_StringView_FromString("google/protobuf/descriptor.proto");
size_t i, n;
deps = google_protobuf_FileDescriptorProto_dependency(file, &n);
for (i = 0; i < n; i++) {
if (upb_strview_eql(deps[i], name)) {
if (upb_StringView_IsEqual(deps[i], name)) {
return true;
}
}
@ -874,37 +853,48 @@ bool depends_on_descriptor(const google_protobuf_FileDescriptorProto* file) {
return false;
}
static void add_message_name_mappings(const upb_MessageDef *message) {
NameMap_AddMessage(message);
int msg_n = upb_MessageDef_NestedMessageCount(message);
for (int i = 0; i < msg_n; i++) {
add_message_name_mappings(upb_MessageDef_NestedMessage(message, i));
}
int enum_n = upb_MessageDef_NestedEnumCount(message);
for (int i = 0; i < enum_n; i++) {
NameMap_AddEnum(upb_MessageDef_NestedEnum(message, i));
}
}
/*
* add_name_mappings()
*
* Adds the messages and enums in this file to the NameMap.
*/
static void add_name_mappings(const upb_filedef *file) {
size_t i;
for (i = 0; i < upb_filedef_msgcount(file); i++) {
NameMap_AddMessage(upb_filedef_msg(file, i));
static void add_name_mappings(const upb_FileDef *file) {
for (int i = 0; i < upb_FileDef_TopLevelMessageCount(file); i++) {
add_message_name_mappings(upb_FileDef_TopLevelMessage(file, i));
}
for (i = 0; i < upb_filedef_enumcount(file); i++) {
NameMap_AddEnum(upb_filedef_enum(file, i));
for (int i = 0; i < upb_FileDef_TopLevelEnumCount(file); i++) {
NameMap_AddEnum(upb_FileDef_TopLevelEnum(file, i));
}
}
static void add_descriptor(DescriptorPool *pool,
const google_protobuf_FileDescriptorProto *file) {
upb_strview name = google_protobuf_FileDescriptorProto_name(file);
upb_status status;
const upb_filedef *file_def;
upb_status_clear(&status);
upb_StringView name = google_protobuf_FileDescriptorProto_name(file);
upb_Status status;
const upb_FileDef *file_def;
upb_Status_Clear(&status);
if (upb_symtab_lookupfile2(pool->symtab, name.data, name.size)) {
if (upb_DefPool_FindFileByNameWithSize(pool->symtab, name.data, name.size)) {
// Already added.
// TODO(teboring): Re-enable this warning when aggregate metadata is
// deprecated.
// zend_error(E_USER_WARNING,
// "proto descriptor was previously loaded (included in multiple "
// "metadata bundles?): " UPB_STRVIEW_FORMAT,
// UPB_STRVIEW_ARGS(name));
// "metadata bundles?): " UPB_STRINGVIEW_FORMAT,
// UPB_STRINGVIEW_ARGS(name));
return;
}
@ -915,7 +905,7 @@ static void add_descriptor(DescriptorPool *pool,
google_protobuf_FileDescriptorProto_getmsgdef(pool->symtab);
}
file_def = upb_symtab_addfile(pool->symtab, file, &status);
file_def = upb_DefPool_AddFile(pool->symtab, file, &status);
CheckUpbStatus(&status, "Unable to load descriptor");
add_name_mappings(file_def);
}
@ -926,7 +916,7 @@ static void add_descriptor(DescriptorPool *pool,
* Adds the given descriptor data to this DescriptorPool.
*/
static void add_descriptor_set(DescriptorPool *pool, const char *data,
int data_len, upb_arena *arena) {
int data_len, upb_Arena *arena) {
size_t i, n;
google_protobuf_FileDescriptorSet *set;
const google_protobuf_FileDescriptorProto* const* files;
@ -948,12 +938,12 @@ static void add_descriptor_set(DescriptorPool *pool, const char *data,
bool DescriptorPool_HasFile(const char *filename) {
DescriptorPool *intern = GetPool(get_generated_pool());
return upb_symtab_lookupfile(intern->symtab, filename) != NULL;
return upb_DefPool_FindFileByName(intern->symtab, filename) != NULL;
}
void DescriptorPool_AddDescriptor(const char *filename, const char *data,
int size) {
upb_arena *arena = upb_arena_new();
upb_Arena *arena = upb_Arena_New();
const google_protobuf_FileDescriptorProto *file =
google_protobuf_FileDescriptorProto_parse(data, size, arena);
@ -963,7 +953,7 @@ void DescriptorPool_AddDescriptor(const char *filename, const char *data,
}
add_descriptor(GetPool(get_generated_pool()), file);
upb_arena_free(arena);
upb_Arena_Free(arena);
}
/*
@ -976,16 +966,16 @@ PHP_METHOD(DescriptorPool, internalAddGeneratedFile) {
char *data = NULL;
zend_long data_len;
zend_bool use_nested_submsg = false;
upb_arena *arena;
upb_Arena *arena;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|b", &data, &data_len,
&use_nested_submsg) != SUCCESS) {
return;
}
arena = upb_arena_new();
arena = upb_Arena_New();
add_descriptor_set(intern, data, data_len, arena);
upb_arena_free(arena);
upb_Arena_Free(arena);
}
ZEND_BEGIN_ARG_INFO_EX(arginfo_lookupByName, 0, 0, 1)

View File

@ -40,9 +40,9 @@ void Def_ModuleInit();
// Creates a new DescriptorPool to wrap the given symtab, which must not be
// NULL.
void DescriptorPool_CreateWithSymbolTable(zval *zv, upb_symtab *symtab);
void DescriptorPool_CreateWithSymbolTable(zval *zv, upb_DefPool *symtab);
upb_symtab *DescriptorPool_GetSymbolTable();
upb_DefPool *DescriptorPool_GetSymbolTable();
// Returns true if the global descriptor pool already has the given filename.
bool DescriptorPool_HasFile(const char *filename);
@ -52,38 +52,38 @@ void DescriptorPool_AddDescriptor(const char *filename, const char *data, int si
typedef struct Descriptor {
zend_object std;
const upb_msgdef *msgdef;
const upb_MessageDef *msgdef;
zend_class_entry *class_entry;
} Descriptor;
// Gets or creates a Descriptor* for the given class entry, upb_msgdef, or
// upb_fielddef. The returned Descriptor* will live for the entire request,
// Gets or creates a Descriptor* for the given class entry, upb_MessageDef, or
// upb_FieldDef. The returned Descriptor* will live for the entire request,
// so no ref is necessary to keep it alive. The caller does *not* own a ref
// on the returned object.
Descriptor* Descriptor_GetFromClassEntry(zend_class_entry *ce);
Descriptor* Descriptor_GetFromMessageDef(const upb_msgdef *m);
Descriptor* Descriptor_GetFromFieldDef(const upb_fielddef *f);
Descriptor* Descriptor_GetFromMessageDef(const upb_MessageDef *m);
Descriptor* Descriptor_GetFromFieldDef(const upb_FieldDef *f);
// Packages up a upb_fieldtype_t with a Descriptor, since many functions need
// Packages up a upb_CType with a Descriptor, since many functions need
// both.
typedef struct {
upb_fieldtype_t type;
const Descriptor *desc; // When type == UPB_TYPE_MESSAGE.
upb_CType type;
const Descriptor *desc; // When type == kUpb_CType_Message.
} TypeInfo;
static inline TypeInfo TypeInfo_Get(const upb_fielddef *f) {
TypeInfo ret = {upb_fielddef_type(f), Descriptor_GetFromFieldDef(f)};
static inline TypeInfo TypeInfo_Get(const upb_FieldDef *f) {
TypeInfo ret = {upb_FieldDef_CType(f), Descriptor_GetFromFieldDef(f)};
return ret;
}
static inline TypeInfo TypeInfo_FromType(upb_fieldtype_t type) {
static inline TypeInfo TypeInfo_FromType(upb_CType type) {
TypeInfo ret = {type};
return ret;
}
static inline bool TypeInfo_Eq(TypeInfo a, TypeInfo b) {
if (a.type != b.type) return false;
if (a.type == UPB_TYPE_MESSAGE && a.desc != b.desc) return false;
if (a.type == kUpb_CType_Message && a.desc != b.desc) return false;
return true;
}

View File

@ -50,7 +50,7 @@ static void MapFieldIter_make(zval *val, zval *map_field);
typedef struct {
zend_object std;
zval arena;
upb_map *map;
upb_Map *map;
MapField_Type type;
} MapField;
@ -66,13 +66,13 @@ static TypeInfo KeyType(MapField_Type type) {
return ret;
}
MapField_Type MapType_Get(const upb_fielddef *f) {
const upb_msgdef *ent = upb_fielddef_msgsubdef(f);
const upb_fielddef *key_f = upb_msgdef_itof(ent, 1);
const upb_fielddef *val_f = upb_msgdef_itof(ent, 2);
MapField_Type MapType_Get(const upb_FieldDef *f) {
const upb_MessageDef *ent = upb_FieldDef_MessageSubDef(f);
const upb_FieldDef *key_f = upb_MessageDef_FindFieldByNumber(ent, 1);
const upb_FieldDef *val_f = upb_MessageDef_FindFieldByNumber(ent, 2);
MapField_Type type = {
upb_fielddef_type(key_f),
{upb_fielddef_type(val_f), Descriptor_GetFromFieldDef(val_f)}};
upb_FieldDef_CType(key_f),
{upb_FieldDef_CType(val_f), Descriptor_GetFromFieldDef(val_f)}};
return type;
}
@ -135,15 +135,15 @@ static int MapField_compare_objects(zval *map1, zval *map2) {
*/
static zend_object *MapField_clone_obj(PROTO_VAL *object) {
MapField* intern = PROTO_VAL_P(object);
upb_arena *arena = Arena_Get(&intern->arena);
upb_map *clone =
upb_map_new(arena, intern->type.key_type, intern->type.val_type.type);
size_t iter = UPB_MAP_BEGIN;
upb_Arena *arena = Arena_Get(&intern->arena);
upb_Map *clone =
upb_Map_New(arena, intern->type.key_type, intern->type.val_type.type);
size_t iter = kUpb_Map_Begin;
while (upb_mapiter_next(intern->map, &iter)) {
upb_msgval key = upb_mapiter_key(intern->map, iter);
upb_msgval val = upb_mapiter_value(intern->map, iter);
upb_map_set(clone, key, val, arena);
while (upb_MapIterator_Next(intern->map, &iter)) {
upb_MessageValue key = upb_MapIterator_Key(intern->map, iter);
upb_MessageValue val = upb_MapIterator_Value(intern->map, iter);
upb_Map_Set(clone, key, val, arena);
}
zval ret;
@ -164,7 +164,7 @@ static HashTable *Map_GetProperties(PROTO_VAL *object) {
// These are documented in the header file.
void MapField_GetPhpWrapper(zval *val, upb_map *map, MapField_Type type,
void MapField_GetPhpWrapper(zval *val, upb_Map *map, MapField_Type type,
zval *arena) {
if (!map) {
ZVAL_NULL(val);
@ -184,13 +184,13 @@ void MapField_GetPhpWrapper(zval *val, upb_map *map, MapField_Type type,
}
}
upb_map *MapField_GetUpbMap(zval *val, MapField_Type type, upb_arena *arena) {
upb_Map *MapField_GetUpbMap(zval *val, MapField_Type type, upb_Arena *arena) {
if (Z_ISREF_P(val)) {
ZVAL_DEREF(val);
}
if (Z_TYPE_P(val) == IS_ARRAY) {
upb_map *map = upb_map_new(arena, type.key_type, type.val_type.type);
upb_Map *map = upb_Map_New(arena, type.key_type, type.val_type.type);
HashTable *table = HASH_OF(val);
HashPosition pos;
@ -199,8 +199,8 @@ upb_map *MapField_GetUpbMap(zval *val, MapField_Type type, upb_arena *arena) {
while (true) {
zval php_key;
zval *php_val;
upb_msgval upb_key;
upb_msgval upb_val;
upb_MessageValue upb_key;
upb_MessageValue upb_val;
zend_hash_get_current_key_zval_ex(table, &php_key, &pos);
php_val = zend_hash_get_current_data_ex(table, &pos);
@ -212,7 +212,7 @@ upb_map *MapField_GetUpbMap(zval *val, MapField_Type type, upb_arena *arena) {
return NULL;
}
upb_map_set(map, upb_key, upb_val, arena);
upb_Map_Set(map, upb_key, upb_val, arena);
zend_hash_move_forward_ex(table, &pos);
zval_dtor(&php_key);
}
@ -225,7 +225,7 @@ upb_map *MapField_GetUpbMap(zval *val, MapField_Type type, upb_arena *arena) {
return NULL;
}
upb_arena_fuse(arena, Arena_Get(&intern->arena));
upb_Arena_Fuse(arena, Arena_Get(&intern->arena));
return intern->map;
} else {
php_error_docref(NULL, E_USER_ERROR, "Must be a map");
@ -233,19 +233,19 @@ upb_map *MapField_GetUpbMap(zval *val, MapField_Type type, upb_arena *arena) {
}
}
bool MapEq(const upb_map *m1, const upb_map *m2, MapField_Type type) {
size_t iter = UPB_MAP_BEGIN;
bool MapEq(const upb_Map *m1, const upb_Map *m2, MapField_Type type) {
size_t iter = kUpb_Map_Begin;
if ((m1 == NULL) != (m2 == NULL)) return false;
if (m1 == NULL) return true;
if (upb_map_size(m1) != upb_map_size(m2)) return false;
if (upb_Map_Size(m1) != upb_Map_Size(m2)) return false;
while (upb_mapiter_next(m1, &iter)) {
upb_msgval key = upb_mapiter_key(m1, iter);
upb_msgval val1 = upb_mapiter_value(m1, iter);
upb_msgval val2;
while (upb_MapIterator_Next(m1, &iter)) {
upb_MessageValue key = upb_MapIterator_Key(m1, iter);
upb_MessageValue val1 = upb_MapIterator_Value(m1, iter);
upb_MessageValue val2;
if (!upb_map_get(m2, key, &val2)) return false;
if (!upb_Map_Get(m2, key, &val2)) return false;
if (!ValueEq(val1, val2, type.val_type)) return false;
}
@ -265,7 +265,7 @@ bool MapEq(const upb_map *m1, const upb_map *m2, MapField_Type type) {
*/
PHP_METHOD(MapField, __construct) {
MapField *intern = (MapField*)Z_OBJ_P(getThis());
upb_arena *arena = Arena_Get(&intern->arena);
upb_Arena *arena = Arena_Get(&intern->arena);
zend_long key_type, val_type;
zend_class_entry* klass = NULL;
@ -280,27 +280,27 @@ PHP_METHOD(MapField, __construct) {
// Check that the key type is an allowed type.
switch (intern->type.key_type) {
case UPB_TYPE_INT32:
case UPB_TYPE_INT64:
case UPB_TYPE_UINT32:
case UPB_TYPE_UINT64:
case UPB_TYPE_BOOL:
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES:
case kUpb_CType_Int32:
case kUpb_CType_Int64:
case kUpb_CType_UInt32:
case kUpb_CType_UInt64:
case kUpb_CType_Bool:
case kUpb_CType_String:
case kUpb_CType_Bytes:
// These are OK.
break;
default:
zend_error(E_USER_ERROR, "Invalid key type for map.");
}
if (intern->type.val_type.type == UPB_TYPE_MESSAGE && klass == NULL) {
if (intern->type.val_type.type == kUpb_CType_Message && klass == NULL) {
php_error_docref(NULL, E_USER_ERROR,
"Message/enum type must have concrete class.");
return;
}
intern->map =
upb_map_new(arena, intern->type.key_type, intern->type.val_type.type);
upb_Map_New(arena, intern->type.key_type, intern->type.val_type.type);
ObjCache_Add(intern->map, &intern->std);
}
@ -318,14 +318,14 @@ PHP_METHOD(MapField, __construct) {
PHP_METHOD(MapField, offsetExists) {
MapField *intern = (MapField*)Z_OBJ_P(getThis());
zval *key;
upb_msgval upb_key;
upb_MessageValue upb_key;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "z", &key) != SUCCESS ||
!Convert_PhpToUpb(key, &upb_key, KeyType(intern->type), NULL)) {
return;
}
RETURN_BOOL(upb_map_get(intern->map, upb_key, NULL));
RETURN_BOOL(upb_Map_Get(intern->map, upb_key, NULL));
}
/**
@ -344,14 +344,14 @@ PHP_METHOD(MapField, offsetGet) {
MapField *intern = (MapField*)Z_OBJ_P(getThis());
zval *key;
zval ret;
upb_msgval upb_key, upb_val;
upb_MessageValue upb_key, upb_val;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "z", &key) != SUCCESS ||
!Convert_PhpToUpb(key, &upb_key, KeyType(intern->type), NULL)) {
return;
}
if (!upb_map_get(intern->map, upb_key, &upb_val)) {
if (!upb_Map_Get(intern->map, upb_key, &upb_val)) {
zend_error(E_USER_ERROR, "Given key doesn't exist.");
return;
}
@ -375,9 +375,9 @@ PHP_METHOD(MapField, offsetGet) {
*/
PHP_METHOD(MapField, offsetSet) {
MapField *intern = (MapField*)Z_OBJ_P(getThis());
upb_arena *arena = Arena_Get(&intern->arena);
upb_Arena *arena = Arena_Get(&intern->arena);
zval *key, *val;
upb_msgval upb_key, upb_val;
upb_MessageValue upb_key, upb_val;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "zz", &key, &val) != SUCCESS ||
!Convert_PhpToUpb(key, &upb_key, KeyType(intern->type), NULL) ||
@ -385,7 +385,7 @@ PHP_METHOD(MapField, offsetSet) {
return;
}
upb_map_set(intern->map, upb_key, upb_val, arena);
upb_Map_Set(intern->map, upb_key, upb_val, arena);
}
/**
@ -402,14 +402,14 @@ PHP_METHOD(MapField, offsetSet) {
PHP_METHOD(MapField, offsetUnset) {
MapField *intern = (MapField*)Z_OBJ_P(getThis());
zval *key;
upb_msgval upb_key;
upb_MessageValue upb_key;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "z", &key) != SUCCESS ||
!Convert_PhpToUpb(key, &upb_key, KeyType(intern->type), NULL)) {
return;
}
upb_map_delete(intern->map, upb_key);
upb_Map_Delete(intern->map, upb_key);
}
/**
@ -429,7 +429,7 @@ PHP_METHOD(MapField, count) {
return;
}
RETURN_LONG(upb_map_size(intern->map));
RETURN_LONG(upb_Map_Size(intern->map));
}
/**
@ -569,8 +569,8 @@ static void MapFieldIter_make(zval *val, zval *map_field) {
PHP_METHOD(MapFieldIter, rewind) {
MapFieldIter *intern = (MapFieldIter*)Z_OBJ_P(getThis());
MapField *map_field = (MapField*)Z_OBJ_P(&intern->map_field);
intern->position = UPB_MAP_BEGIN;
upb_mapiter_next(map_field->map, &intern->position);
intern->position = kUpb_Map_Begin;
upb_MapIterator_Next(map_field->map, &intern->position);
}
/**
@ -581,7 +581,7 @@ PHP_METHOD(MapFieldIter, rewind) {
PHP_METHOD(MapFieldIter, current) {
MapFieldIter *intern = (MapFieldIter*)Z_OBJ_P(getThis());
MapField *field = (MapField*)Z_OBJ_P(&intern->map_field);
upb_msgval upb_val = upb_mapiter_value(field->map, intern->position);
upb_MessageValue upb_val = upb_MapIterator_Value(field->map, intern->position);
zval ret;
Convert_UpbToPhp(upb_val, &ret, field->type.val_type, &field->arena);
RETURN_COPY_VALUE(&ret);
@ -595,7 +595,7 @@ PHP_METHOD(MapFieldIter, current) {
PHP_METHOD(MapFieldIter, key) {
MapFieldIter *intern = (MapFieldIter*)Z_OBJ_P(getThis());
MapField *field = (MapField*)Z_OBJ_P(&intern->map_field);
upb_msgval upb_key = upb_mapiter_key(field->map, intern->position);
upb_MessageValue upb_key = upb_MapIterator_Key(field->map, intern->position);
zval ret;
Convert_UpbToPhp(upb_key, &ret, KeyType(field->type), NULL);
RETURN_COPY_VALUE(&ret);
@ -609,7 +609,7 @@ PHP_METHOD(MapFieldIter, key) {
PHP_METHOD(MapFieldIter, next) {
MapFieldIter *intern = (MapFieldIter*)Z_OBJ_P(getThis());
MapField *field = (MapField*)Z_OBJ_P(&intern->map_field);
upb_mapiter_next(field->map, &intern->position);
upb_MapIterator_Next(field->map, &intern->position);
}
/**
@ -620,7 +620,7 @@ PHP_METHOD(MapFieldIter, next) {
PHP_METHOD(MapFieldIter, valid) {
MapFieldIter *intern = (MapFieldIter*)Z_OBJ_P(getThis());
MapField *field = (MapField*)Z_OBJ_P(&intern->map_field);
bool done = upb_mapiter_done(field->map, intern->position);
bool done = upb_MapIterator_Done(field->map, intern->position);
RETURN_BOOL(!done);
}

View File

@ -39,32 +39,32 @@
void Map_ModuleInit();
typedef struct {
upb_fieldtype_t key_type;
upb_CType key_type;
TypeInfo val_type;
} MapField_Type;
MapField_Type MapType_Get(const upb_fielddef *f);
MapField_Type MapType_Get(const upb_FieldDef *f);
// Gets a upb_map* for the PHP object |val|:
// Gets a upb_Map* for the PHP object |val|:
// * If |val| is a RepeatedField object, we first check its type and verify
// that that the elements have the correct type for |f|. If so, we return the
// wrapped upb_map*. We also make sure that this map's arena is fused to
// |arena|, so the returned upb_map is guaranteed to live as long as
// wrapped upb_Map*. We also make sure that this map's arena is fused to
// |arena|, so the returned upb_Map is guaranteed to live as long as
// |arena|.
// * If |val| is a PHP Map, we attempt to create a new upb_map using
// * If |val| is a PHP Map, we attempt to create a new upb_Map using
// |arena| and add all of the PHP elements to it.
//
// If an error occurs, we raise a PHP error and return NULL.
upb_map *MapField_GetUpbMap(zval *val, MapField_Type type, upb_arena *arena);
upb_Map *MapField_GetUpbMap(zval *val, MapField_Type type, upb_Arena *arena);
// Creates a PHP MapField object for the given upb_map* and |f| and returns it
// Creates a PHP MapField object for the given upb_Map* and |f| and returns it
// in |val|. The PHP object will keep a reference to this |arena| to ensure the
// underlying array data stays alive.
//
// If |map| is NULL, this will return a PHP null object.
void MapField_GetPhpWrapper(zval *val, upb_map *arr, MapField_Type type,
void MapField_GetPhpWrapper(zval *val, upb_Map *arr, MapField_Type type,
zval *arena);
bool MapEq(const upb_map *m1, const upb_map *m2, MapField_Type type);
bool MapEq(const upb_Map *m1, const upb_Map *m2, MapField_Type type);
#endif // PHP_PROTOBUF_MAP_H_

View File

@ -54,7 +54,7 @@ typedef struct {
zend_object std;
zval arena;
const Descriptor* desc;
upb_msg *msg;
upb_Message *msg;
} Message;
zend_class_entry *message_ce;
@ -110,10 +110,10 @@ static void Message_dtor(zend_object* obj) {
*
* Helper function to look up a field given a member name (as a string).
*/
static const upb_fielddef *get_field(Message *msg, PROTO_STR *member) {
const upb_msgdef *m = msg->desc->msgdef;
const upb_fielddef *f =
upb_msgdef_ntof(m, PROTO_STRVAL_P(member), PROTO_STRLEN_P(member));
static const upb_FieldDef *get_field(Message *msg, PROTO_STR *member) {
const upb_MessageDef *m = msg->desc->msgdef;
const upb_FieldDef *f =
upb_MessageDef_FindFieldByNameWithSize(m, PROTO_STRVAL_P(member), PROTO_STRLEN_P(member));
if (!f) {
zend_throw_exception_ex(NULL, 0, "No such property %s.",
@ -123,72 +123,91 @@ static const upb_fielddef *get_field(Message *msg, PROTO_STR *member) {
return f;
}
static void Message_get(Message *intern, const upb_fielddef *f, zval *rv) {
upb_arena *arena = Arena_Get(&intern->arena);
// Check if the field is a well known wrapper type
static bool IsWrapper(const upb_MessageDef* m) {
if (!m) return false;
switch (upb_MessageDef_WellKnownType(m)) {
case kUpb_WellKnown_DoubleValue:
case kUpb_WellKnown_FloatValue:
case kUpb_WellKnown_Int64Value:
case kUpb_WellKnown_UInt64Value:
case kUpb_WellKnown_Int32Value:
case kUpb_WellKnown_UInt32Value:
case kUpb_WellKnown_StringValue:
case kUpb_WellKnown_BytesValue:
case kUpb_WellKnown_BoolValue:
return true;
default:
return false;
}
}
if (upb_fielddef_ismap(f)) {
upb_mutmsgval msgval = upb_msg_mutable(intern->msg, f, arena);
static void Message_get(Message *intern, const upb_FieldDef *f, zval *rv) {
upb_Arena *arena = Arena_Get(&intern->arena);
if (upb_FieldDef_IsMap(f)) {
upb_MutableMessageValue msgval = upb_Message_Mutable(intern->msg, f, arena);
MapField_GetPhpWrapper(rv, msgval.map, MapType_Get(f), &intern->arena);
} else if (upb_fielddef_isseq(f)) {
upb_mutmsgval msgval = upb_msg_mutable(intern->msg, f, arena);
} else if (upb_FieldDef_IsRepeated(f)) {
upb_MutableMessageValue msgval = upb_Message_Mutable(intern->msg, f, arena);
RepeatedField_GetPhpWrapper(rv, msgval.array, TypeInfo_Get(f),
&intern->arena);
} else {
if (upb_fielddef_issubmsg(f) && !upb_msg_has(intern->msg, f)) {
if (upb_FieldDef_IsSubMessage(f) && !upb_Message_Has(intern->msg, f)) {
ZVAL_NULL(rv);
return;
}
upb_msgval msgval = upb_msg_get(intern->msg, f);
upb_MessageValue msgval = upb_Message_Get(intern->msg, f);
Convert_UpbToPhp(msgval, rv, TypeInfo_Get(f), &intern->arena);
}
}
static bool Message_set(Message *intern, const upb_fielddef *f, zval *val) {
upb_arena *arena = Arena_Get(&intern->arena);
upb_msgval msgval;
static bool Message_set(Message *intern, const upb_FieldDef *f, zval *val) {
upb_Arena *arena = Arena_Get(&intern->arena);
upb_MessageValue msgval;
if (upb_fielddef_ismap(f)) {
if (upb_FieldDef_IsMap(f)) {
msgval.map_val = MapField_GetUpbMap(val, MapType_Get(f), arena);
if (!msgval.map_val) return false;
} else if (upb_fielddef_isseq(f)) {
} else if (upb_FieldDef_IsRepeated(f)) {
msgval.array_val = RepeatedField_GetUpbArray(val, TypeInfo_Get(f), arena);
if (!msgval.array_val) return false;
} else if (upb_fielddef_issubmsg(f) && Z_TYPE_P(val) == IS_NULL) {
upb_msg_clearfield(intern->msg, f);
} else if (upb_FieldDef_IsSubMessage(f) && Z_TYPE_P(val) == IS_NULL) {
upb_Message_ClearField(intern->msg, f);
return true;
} else {
if (!Convert_PhpToUpb(val, &msgval, TypeInfo_Get(f), arena)) return false;
}
upb_msg_set(intern->msg, f, msgval, arena);
upb_Message_Set(intern->msg, f, msgval, arena);
return true;
}
static bool MessageEq(const upb_msg *m1, const upb_msg *m2, const upb_msgdef *m);
static bool MessageEq(const upb_Message *m1, const upb_Message *m2, const upb_MessageDef *m);
/**
* ValueEq()
*/
bool ValueEq(upb_msgval val1, upb_msgval val2, TypeInfo type) {
bool ValueEq(upb_MessageValue val1, upb_MessageValue val2, TypeInfo type) {
switch (type.type) {
case UPB_TYPE_BOOL:
case kUpb_CType_Bool:
return val1.bool_val == val2.bool_val;
case UPB_TYPE_INT32:
case UPB_TYPE_UINT32:
case UPB_TYPE_ENUM:
case kUpb_CType_Int32:
case kUpb_CType_UInt32:
case kUpb_CType_Enum:
return val1.int32_val == val2.int32_val;
case UPB_TYPE_INT64:
case UPB_TYPE_UINT64:
case kUpb_CType_Int64:
case kUpb_CType_UInt64:
return val1.int64_val == val2.int64_val;
case UPB_TYPE_FLOAT:
case kUpb_CType_Float:
return val1.float_val == val2.float_val;
case UPB_TYPE_DOUBLE:
case kUpb_CType_Double:
return val1.double_val == val2.double_val;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES:
case kUpb_CType_String:
case kUpb_CType_Bytes:
return val1.str_val.size == val2.str_val.size &&
memcmp(val1.str_val.data, val2.str_val.data, val1.str_val.size) == 0;
case UPB_TYPE_MESSAGE:
case kUpb_CType_Message:
return MessageEq(val1.msg_val, val2.msg_val, type.desc->msgdef);
default:
return false;
@ -198,27 +217,25 @@ bool ValueEq(upb_msgval val1, upb_msgval val2, TypeInfo type) {
/**
* MessageEq()
*/
static bool MessageEq(const upb_msg *m1, const upb_msg *m2, const upb_msgdef *m) {
upb_msg_field_iter i;
static bool MessageEq(const upb_Message *m1, const upb_Message *m2, const upb_MessageDef *m) {
int n = upb_MessageDef_FieldCount(m);
for(upb_msg_field_begin(&i, m);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
for(int i = 0; i < n; i++) {
const upb_FieldDef *f = upb_MessageDef_Field(m, i);
if (upb_fielddef_haspresence(f)) {
if (upb_msg_has(m1, f) != upb_msg_has(m2, f)) {
if (upb_FieldDef_HasPresence(f)) {
if (upb_Message_Has(m1, f) != upb_Message_Has(m2, f)) {
return false;
}
if (!upb_msg_has(m1, f)) continue;
if (!upb_Message_Has(m1, f)) continue;
}
upb_msgval val1 = upb_msg_get(m1, f);
upb_msgval val2 = upb_msg_get(m2, f);
upb_MessageValue val1 = upb_Message_Get(m1, f);
upb_MessageValue val2 = upb_Message_Get(m2, f);
if (upb_fielddef_ismap(f)) {
if (upb_FieldDef_IsMap(f)) {
if (!MapEq(val1.map_val, val2.map_val, MapType_Get(f))) return false;
} else if (upb_fielddef_isseq(f)) {
} else if (upb_FieldDef_IsRepeated(f)) {
if (!ArrayEq(val1.array_val, val2.array_val, TypeInfo_Get(f))) return false;
} else {
if (!ValueEq(val1, val2, TypeInfo_Get(f))) return false;
@ -239,7 +256,7 @@ static bool MessageEq(const upb_msg *m1, const upb_msg *m2, const upb_msgdef *m)
static int Message_compare_objects(zval *m1, zval *m2) {
Message* intern1 = (Message*)Z_OBJ_P(m1);
Message* intern2 = (Message*)Z_OBJ_P(m2);
const upb_msgdef *m = intern1->desc->msgdef;
const upb_MessageDef *m = intern1->desc->msgdef;
if (intern2->desc->msgdef != m) return 1;
@ -268,19 +285,19 @@ static int Message_has_property(PROTO_VAL *obj, PROTO_STR *member,
int has_set_exists,
void **cache_slot) {
Message* intern = PROTO_VAL_P(obj);
const upb_fielddef *f = get_field(intern, member);
const upb_FieldDef *f = get_field(intern, member);
if (!f) return 0;
if (!upb_fielddef_haspresence(f)) {
if (!upb_FieldDef_HasPresence(f)) {
zend_throw_exception_ex(
NULL, 0,
"Cannot call isset() on field %s which does not have presence.",
upb_fielddef_name(f));
upb_FieldDef_Name(f));
return 0;
}
return upb_msg_has(intern->msg, f);
return upb_Message_Has(intern->msg, f);
}
/**
@ -302,19 +319,19 @@ static int Message_has_property(PROTO_VAL *obj, PROTO_STR *member,
static void Message_unset_property(PROTO_VAL *obj, PROTO_STR *member,
void **cache_slot) {
Message* intern = PROTO_VAL_P(obj);
const upb_fielddef *f = get_field(intern, member);
const upb_FieldDef *f = get_field(intern, member);
if (!f) return;
if (!upb_fielddef_haspresence(f)) {
if (!upb_FieldDef_HasPresence(f)) {
zend_throw_exception_ex(
NULL, 0,
"Cannot call unset() on field %s which does not have presence.",
upb_fielddef_name(f));
upb_FieldDef_Name(f));
return;
}
upb_msg_clearfield(intern->msg, f);
upb_Message_ClearField(intern->msg, f);
}
@ -339,7 +356,7 @@ static void Message_unset_property(PROTO_VAL *obj, PROTO_STR *member,
static zval *Message_read_property(PROTO_VAL *obj, PROTO_STR *member,
int type, void **cache_slot, zval *rv) {
Message* intern = PROTO_VAL_P(obj);
const upb_fielddef *f = get_field(intern, member);
const upb_FieldDef *f = get_field(intern, member);
if (!f) return &EG(uninitialized_zval);
Message_get(intern, f, rv);
@ -370,7 +387,7 @@ static zval *Message_read_property(PROTO_VAL *obj, PROTO_STR *member,
static PROTO_RETURN_VAL Message_write_property(
PROTO_VAL *obj, PROTO_STR *member, zval *val, void **cache_slot) {
Message* intern = PROTO_VAL_P(obj);
const upb_fielddef *f = get_field(intern, member);
const upb_FieldDef *f = get_field(intern, member);
if (f && Message_set(intern, f, val)) {
#if PHP_VERSION_ID < 70400
@ -409,11 +426,11 @@ static zval *Message_get_property_ptr_ptr(PROTO_VAL *object, PROTO_STR *member,
*/
static zend_object *Message_clone_obj(PROTO_VAL *object) {
Message* intern = PROTO_VAL_P(object);
upb_msg *clone = upb_msg_new(intern->desc->msgdef, Arena_Get(&intern->arena));
upb_Message *clone = upb_Message_New(intern->desc->msgdef, Arena_Get(&intern->arena));
// TODO: copy unknown fields?
// TODO: use official upb msg copy function
memcpy(clone, intern->msg, upb_msgdef_layout(intern->desc->msgdef)->size);
memcpy(clone, intern->msg, upb_MessageDef_MiniTable(intern->desc->msgdef)->size);
zval ret;
Message_GetPhpWrapper(&ret, intern->desc, clone, &intern->arena);
return Z_OBJ_P(&ret);
@ -433,7 +450,7 @@ static HashTable *Message_get_properties(PROTO_VAL *object) {
// These are documented in the header file.
void Message_GetPhpWrapper(zval *val, const Descriptor *desc, upb_msg *msg,
void Message_GetPhpWrapper(zval *val, const Descriptor *desc, upb_Message *msg,
zval *arena) {
if (!msg) {
ZVAL_NULL(val);
@ -453,8 +470,8 @@ void Message_GetPhpWrapper(zval *val, const Descriptor *desc, upb_msg *msg,
}
}
bool Message_GetUpbMessage(zval *val, const Descriptor *desc, upb_arena *arena,
upb_msg **msg) {
bool Message_GetUpbMessage(zval *val, const Descriptor *desc, upb_Arena *arena,
upb_Message **msg) {
PBPHP_ASSERT(desc);
if (Z_ISREF_P(val)) {
@ -464,7 +481,7 @@ bool Message_GetUpbMessage(zval *val, const Descriptor *desc, upb_arena *arena,
if (Z_TYPE_P(val) == IS_OBJECT &&
instanceof_function(Z_OBJCE_P(val), desc->class_entry)) {
Message *intern = (Message*)Z_OBJ_P(val);
upb_arena_fuse(arena, Arena_Get(&intern->arena));
upb_Arena_Fuse(arena, Arena_Get(&intern->arena));
*msg = intern->msg;
return true;
} else {
@ -501,8 +518,8 @@ bool Message_GetUpbMessage(zval *val, const Descriptor *desc, upb_arena *arena,
*
* The initializer must be an array.
*/
bool Message_InitFromPhp(upb_msg *msg, const upb_msgdef *m, zval *init,
upb_arena *arena) {
bool Message_InitFromPhp(upb_Message *msg, const upb_MessageDef *m, zval *init,
upb_Arena *arena) {
HashTable* table = HASH_OF(init);
HashPosition pos;
@ -513,7 +530,7 @@ bool Message_InitFromPhp(upb_msg *msg, const upb_msgdef *m, zval *init,
if (Z_TYPE_P(init) != IS_ARRAY) {
zend_throw_exception_ex(NULL, 0,
"Initializer for a message %s must be an array.",
upb_msgdef_fullname(m));
upb_MessageDef_FullName(m));
return false;
}
@ -522,8 +539,8 @@ bool Message_InitFromPhp(upb_msg *msg, const upb_msgdef *m, zval *init,
while (true) { // Iterate over key/value pairs.
zval key;
zval *val;
const upb_fielddef *f;
upb_msgval msgval;
const upb_FieldDef *f;
upb_MessageValue msgval;
zend_hash_get_current_key_zval_ex(table, &key, &pos);
val = zend_hash_get_current_data_ex(table, &pos);
@ -534,7 +551,7 @@ bool Message_InitFromPhp(upb_msg *msg, const upb_msgdef *m, zval *init,
ZVAL_DEREF(val);
}
f = upb_msgdef_ntof(m, Z_STRVAL_P(&key), Z_STRLEN_P(&key));
f = upb_MessageDef_FindFieldByNameWithSize(m, Z_STRVAL_P(&key), Z_STRLEN_P(&key));
if (!f) {
zend_throw_exception_ex(NULL, 0,
@ -542,10 +559,10 @@ bool Message_InitFromPhp(upb_msg *msg, const upb_msgdef *m, zval *init,
return false;
}
if (upb_fielddef_ismap(f)) {
if (upb_FieldDef_IsMap(f)) {
msgval.map_val = MapField_GetUpbMap(val, MapType_Get(f), arena);
if (!msgval.map_val) return false;
} else if (upb_fielddef_isseq(f)) {
} else if (upb_FieldDef_IsRepeated(f)) {
msgval.array_val = RepeatedField_GetUpbArray(val, TypeInfo_Get(f), arena);
if (!msgval.array_val) return false;
} else {
@ -554,7 +571,7 @@ bool Message_InitFromPhp(upb_msg *msg, const upb_msgdef *m, zval *init,
}
}
upb_msg_set(msg, f, msgval, arena);
upb_Message_Set(msg, f, msgval, arena);
zend_hash_move_forward_ex(table, &pos);
zval_dtor(&key);
}
@ -562,7 +579,7 @@ bool Message_InitFromPhp(upb_msg *msg, const upb_msgdef *m, zval *init,
static void Message_Initialize(Message *intern, const Descriptor *desc) {
intern->desc = desc;
intern->msg = upb_msg_new(desc->msgdef, Arena_Get(&intern->arena));
intern->msg = upb_Message_New(desc->msgdef, Arena_Get(&intern->arena));
ObjCache_Add(intern->msg, &intern->std);
}
@ -576,7 +593,7 @@ PHP_METHOD(Message, __construct) {
Message* intern = (Message*)Z_OBJ_P(getThis());
const Descriptor* desc;
zend_class_entry *ce = Z_OBJCE_P(getThis());
upb_arena *arena = Arena_Get(&intern->arena);
upb_Arena *arena = Arena_Get(&intern->arena);
zval *init_arr = NULL;
// This descriptor should always be available, as the generated __construct
@ -617,7 +634,7 @@ PHP_METHOD(Message, __construct) {
*/
PHP_METHOD(Message, discardUnknownFields) {
Message* intern = (Message*)Z_OBJ_P(getThis());
upb_msg_discardunknown(intern->msg, intern->desc->msgdef, 64);
upb_Message_DiscardUnknown(intern->msg, intern->desc->msgdef, 64);
}
/**
@ -627,7 +644,7 @@ PHP_METHOD(Message, discardUnknownFields) {
*/
PHP_METHOD(Message, clear) {
Message* intern = (Message*)Z_OBJ_P(getThis());
upb_msg_clear(intern->msg, intern->desc->msgdef);
upb_Message_Clear(intern->msg, intern->desc->msgdef);
}
/**
@ -639,8 +656,8 @@ PHP_METHOD(Message, clear) {
PHP_METHOD(Message, mergeFrom) {
Message* intern = (Message*)Z_OBJ_P(getThis());
Message* from;
upb_arena *arena = Arena_Get(&intern->arena);
const upb_msglayout *l = upb_msgdef_layout(intern->desc->msgdef);
upb_Arena *arena = Arena_Get(&intern->arena);
const upb_MiniTable *l = upb_MessageDef_MiniTable(intern->desc->msgdef);
zval* value;
char *pb;
size_t size;
@ -659,14 +676,15 @@ PHP_METHOD(Message, mergeFrom) {
// TODO(haberman): use a temp arena for this once we can make upb_decode()
// copy strings.
pb = upb_encode(from->msg, l, arena, &size);
pb = upb_Encode(from->msg, l, 0, arena, &size);
if (!pb) {
zend_throw_exception_ex(NULL, 0, "Max nesting exceeded");
return;
}
ok = upb_decode(pb, size, intern->msg, l, arena);
ok = upb_Decode(pb, size, intern->msg, l, NULL, 0, arena) ==
kUpb_DecodeStatus_Ok;
PBPHP_ASSERT(ok);
}
@ -681,8 +699,8 @@ PHP_METHOD(Message, mergeFromString) {
char *data = NULL;
char *data_copy = NULL;
zend_long data_len;
const upb_msglayout *l = upb_msgdef_layout(intern->desc->msgdef);
upb_arena *arena = Arena_Get(&intern->arena);
const upb_MiniTable *l = upb_MessageDef_MiniTable(intern->desc->msgdef);
upb_Arena *arena = Arena_Get(&intern->arena);
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &data, &data_len) ==
FAILURE) {
@ -690,10 +708,11 @@ PHP_METHOD(Message, mergeFromString) {
}
// TODO(haberman): avoid this copy when we can make the decoder copy.
data_copy = upb_arena_malloc(arena, data_len);
data_copy = upb_Arena_Malloc(arena, data_len);
memcpy(data_copy, data, data_len);
if (!upb_decode(data_copy, data_len, intern->msg, l, arena)) {
if (upb_Decode(data_copy, data_len, intern->msg, l, NULL, 0, arena) !=
kUpb_DecodeStatus_Ok) {
zend_throw_exception_ex(NULL, 0, "Error occurred during parsing");
return;
}
@ -707,21 +726,21 @@ PHP_METHOD(Message, mergeFromString) {
*/
PHP_METHOD(Message, serializeToString) {
Message* intern = (Message*)Z_OBJ_P(getThis());
const upb_msglayout *l = upb_msgdef_layout(intern->desc->msgdef);
upb_arena *tmp_arena = upb_arena_new();
const upb_MiniTable *l = upb_MessageDef_MiniTable(intern->desc->msgdef);
upb_Arena *tmp_arena = upb_Arena_New();
char *data;
size_t size;
data = upb_encode(intern->msg, l, tmp_arena, &size);
data = upb_Encode(intern->msg, l, 0, tmp_arena, &size);
if (!data) {
zend_throw_exception_ex(NULL, 0, "Error occurred during serialization");
upb_arena_free(tmp_arena);
upb_Arena_Free(tmp_arena);
return;
}
RETVAL_STRINGL(data, size);
upb_arena_free(tmp_arena);
upb_Arena_Free(tmp_arena);
}
/**
@ -735,8 +754,8 @@ PHP_METHOD(Message, mergeFromJsonString) {
char *data = NULL;
char *data_copy = NULL;
zend_long data_len;
upb_arena *arena = Arena_Get(&intern->arena);
upb_status status;
upb_Arena *arena = Arena_Get(&intern->arena);
upb_Status status;
zend_bool ignore_json_unknown = false;
int options = 0;
@ -746,20 +765,20 @@ PHP_METHOD(Message, mergeFromJsonString) {
}
// TODO(haberman): avoid this copy when we can make the decoder copy.
data_copy = upb_arena_malloc(arena, data_len + 1);
data_copy = upb_Arena_Malloc(arena, data_len + 1);
memcpy(data_copy, data, data_len);
data_copy[data_len] = '\0';
if (ignore_json_unknown) {
options |= UPB_JSONDEC_IGNOREUNKNOWN;
options |= upb_JsonDecode_IgnoreUnknown;
}
upb_status_clear(&status);
if (!upb_json_decode(data_copy, data_len, intern->msg, intern->desc->msgdef,
upb_Status_Clear(&status);
if (!upb_JsonDecode(data_copy, data_len, intern->msg, intern->desc->msgdef,
DescriptorPool_GetSymbolTable(), options, arena,
&status)) {
zend_throw_exception_ex(NULL, 0, "Error occurred during parsing: %s",
upb_status_errmsg(&status));
upb_Status_ErrorMessage(&status));
return;
}
}
@ -776,7 +795,7 @@ PHP_METHOD(Message, serializeToJsonString) {
int options = 0;
char buf[1024];
zend_bool preserve_proto_fieldnames = false;
upb_status status;
upb_Status status;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "|b",
&preserve_proto_fieldnames) == FAILURE) {
@ -784,24 +803,24 @@ PHP_METHOD(Message, serializeToJsonString) {
}
if (preserve_proto_fieldnames) {
options |= UPB_JSONENC_PROTONAMES;
options |= upb_JsonEncode_UseProtoNames;
}
upb_status_clear(&status);
size = upb_json_encode(intern->msg, intern->desc->msgdef,
upb_Status_Clear(&status);
size = upb_JsonEncode(intern->msg, intern->desc->msgdef,
DescriptorPool_GetSymbolTable(), options, buf,
sizeof(buf), &status);
if (!upb_ok(&status)) {
if (!upb_Status_IsOk(&status)) {
zend_throw_exception_ex(NULL, 0,
"Error occurred during JSON serialization: %s",
upb_status_errmsg(&status));
upb_Status_ErrorMessage(&status));
return;
}
if (size >= sizeof(buf)) {
char *buf2 = malloc(size + 1);
upb_json_encode(intern->msg, intern->desc->msgdef,
upb_JsonEncode(intern->msg, intern->desc->msgdef,
DescriptorPool_GetSymbolTable(), options, buf2, size + 1,
&status);
RETVAL_STRINGL(buf2, size);
@ -827,26 +846,26 @@ PHP_METHOD(Message, serializeToJsonString) {
PHP_METHOD(Message, readWrapperValue) {
Message* intern = (Message*)Z_OBJ_P(getThis());
char* member;
const upb_fielddef *f;
const upb_FieldDef *f;
zend_long size;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &member, &size) == FAILURE) {
return;
}
f = upb_msgdef_ntof(intern->desc->msgdef, member, size);
f = upb_MessageDef_FindFieldByNameWithSize(intern->desc->msgdef, member, size);
if (!f || !upb_msgdef_iswrapper(upb_fielddef_msgsubdef(f))) {
if (!f || !IsWrapper(upb_FieldDef_MessageSubDef(f))) {
zend_throw_exception_ex(NULL, 0, "Message %s has no field %s",
upb_msgdef_fullname(intern->desc->msgdef), member);
upb_MessageDef_FullName(intern->desc->msgdef), member);
return;
}
if (upb_msg_has(intern->msg, f)) {
const upb_msg *wrapper = upb_msg_get(intern->msg, f).msg_val;
const upb_msgdef *m = upb_fielddef_msgsubdef(f);
const upb_fielddef *val_f = upb_msgdef_itof(m, 1);
upb_msgval msgval = upb_msg_get(wrapper, val_f);
if (upb_Message_Has(intern->msg, f)) {
const upb_Message *wrapper = upb_Message_Get(intern->msg, f).msg_val;
const upb_MessageDef *m = upb_FieldDef_MessageSubDef(f);
const upb_FieldDef *val_f = upb_MessageDef_FindFieldByNumber(m, 1);
upb_MessageValue msgval = upb_Message_Get(wrapper, val_f);
zval ret;
Convert_UpbToPhp(msgval, &ret, TypeInfo_Get(val_f), &intern->arena);
RETURN_COPY_VALUE(&ret);
@ -872,10 +891,10 @@ PHP_METHOD(Message, readWrapperValue) {
*/
PHP_METHOD(Message, writeWrapperValue) {
Message* intern = (Message*)Z_OBJ_P(getThis());
upb_arena *arena = Arena_Get(&intern->arena);
upb_Arena *arena = Arena_Get(&intern->arena);
char* member;
const upb_fielddef *f;
upb_msgval msgval;
const upb_FieldDef *f;
upb_MessageValue msgval;
zend_long size;
zval* val;
@ -884,11 +903,11 @@ PHP_METHOD(Message, writeWrapperValue) {
return;
}
f = upb_msgdef_ntof(intern->desc->msgdef, member, size);
f = upb_MessageDef_FindFieldByNameWithSize(intern->desc->msgdef, member, size);
if (!f || !upb_msgdef_iswrapper(upb_fielddef_msgsubdef(f))) {
if (!f || !IsWrapper(upb_FieldDef_MessageSubDef(f))) {
zend_throw_exception_ex(NULL, 0, "Message %s has no field %s",
upb_msgdef_fullname(intern->desc->msgdef), member);
upb_MessageDef_FullName(intern->desc->msgdef), member);
return;
}
@ -897,18 +916,18 @@ PHP_METHOD(Message, writeWrapperValue) {
}
if (Z_TYPE_P(val) == IS_NULL) {
upb_msg_clearfield(intern->msg, f);
upb_Message_ClearField(intern->msg, f);
} else {
const upb_msgdef *m = upb_fielddef_msgsubdef(f);
const upb_fielddef *val_f = upb_msgdef_itof(m, 1);
upb_msg *wrapper;
const upb_MessageDef *m = upb_FieldDef_MessageSubDef(f);
const upb_FieldDef *val_f = upb_MessageDef_FindFieldByNumber(m, 1);
upb_Message *wrapper;
if (!Convert_PhpToUpb(val, &msgval, TypeInfo_Get(val_f), arena)) {
return; // Error is already set.
}
wrapper = upb_msg_mutable(intern->msg, f, arena).msg;
upb_msg_set(wrapper, val_f, msgval, arena);
wrapper = upb_Message_Mutable(intern->msg, f, arena).msg;
upb_Message_Set(wrapper, val_f, msgval, arena);
}
}
@ -922,8 +941,8 @@ PHP_METHOD(Message, writeWrapperValue) {
*/
PHP_METHOD(Message, whichOneof) {
Message* intern = (Message*)Z_OBJ_P(getThis());
const upb_oneofdef* oneof;
const upb_fielddef* field;
const upb_OneofDef* oneof;
const upb_FieldDef* field;
char* name;
zend_long len;
@ -931,16 +950,16 @@ PHP_METHOD(Message, whichOneof) {
return;
}
oneof = upb_msgdef_ntoo(intern->desc->msgdef, name, len);
oneof = upb_MessageDef_FindOneofByNameWithSize(intern->desc->msgdef, name, len);
if (!oneof) {
zend_throw_exception_ex(NULL, 0, "Message %s has no oneof %s",
upb_msgdef_fullname(intern->desc->msgdef), name);
upb_MessageDef_FullName(intern->desc->msgdef), name);
return;
}
field = upb_msg_whichoneof(intern->msg, oneof);
RETURN_STRING(field ? upb_fielddef_name(field) : "");
field = upb_Message_WhichOneof(intern->msg, oneof);
RETURN_STRING(field ? upb_FieldDef_Name(field) : "");
}
/**
@ -959,21 +978,21 @@ PHP_METHOD(Message, whichOneof) {
PHP_METHOD(Message, hasOneof) {
Message* intern = (Message*)Z_OBJ_P(getThis());
zend_long field_num;
const upb_fielddef* f;
const upb_FieldDef* f;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &field_num) == FAILURE) {
return;
}
f = upb_msgdef_itof(intern->desc->msgdef, field_num);
f = upb_MessageDef_FindFieldByNumber(intern->desc->msgdef, field_num);
if (!f || !upb_fielddef_realcontainingoneof(f)) {
if (!f || !upb_FieldDef_RealContainingOneof(f)) {
php_error_docref(NULL, E_USER_ERROR,
"Internal error, no such oneof field %d\n",
(int)field_num);
}
RETVAL_BOOL(upb_msg_has(intern->msg, f));
RETVAL_BOOL(upb_Message_Has(intern->msg, f));
}
/**
@ -992,27 +1011,27 @@ PHP_METHOD(Message, hasOneof) {
PHP_METHOD(Message, readOneof) {
Message* intern = (Message*)Z_OBJ_P(getThis());
zend_long field_num;
const upb_fielddef* f;
const upb_FieldDef* f;
zval ret;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &field_num) == FAILURE) {
return;
}
f = upb_msgdef_itof(intern->desc->msgdef, field_num);
f = upb_MessageDef_FindFieldByNumber(intern->desc->msgdef, field_num);
if (!f || !upb_fielddef_realcontainingoneof(f)) {
if (!f || !upb_FieldDef_RealContainingOneof(f)) {
php_error_docref(NULL, E_USER_ERROR,
"Internal error, no such oneof field %d\n",
(int)field_num);
}
if (upb_fielddef_issubmsg(f) && !upb_msg_has(intern->msg, f)) {
if (upb_FieldDef_IsSubMessage(f) && !upb_Message_Has(intern->msg, f)) {
RETURN_NULL();
}
{
upb_msgval msgval = upb_msg_get(intern->msg, f);
upb_MessageValue msgval = upb_Message_Get(intern->msg, f);
Convert_UpbToPhp(msgval, &ret, TypeInfo_Get(f), &intern->arena);
}
@ -1042,9 +1061,9 @@ PHP_METHOD(Message, readOneof) {
PHP_METHOD(Message, writeOneof) {
Message* intern = (Message*)Z_OBJ_P(getThis());
zend_long field_num;
const upb_fielddef* f;
upb_arena *arena = Arena_Get(&intern->arena);
upb_msgval msgval;
const upb_FieldDef* f;
upb_Arena *arena = Arena_Get(&intern->arena);
upb_MessageValue msgval;
zval* val;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "lz", &field_num, &val) ==
@ -1052,16 +1071,16 @@ PHP_METHOD(Message, writeOneof) {
return;
}
f = upb_msgdef_itof(intern->desc->msgdef, field_num);
f = upb_MessageDef_FindFieldByNumber(intern->desc->msgdef, field_num);
if (upb_fielddef_issubmsg(f) && Z_TYPE_P(val) == IS_NULL) {
upb_msg_clearfield(intern->msg, f);
if (upb_FieldDef_IsSubMessage(f) && Z_TYPE_P(val) == IS_NULL) {
upb_Message_ClearField(intern->msg, f);
return;
} else if (!Convert_PhpToUpb(val, &msgval, TypeInfo_Get(f), arena)) {
return;
}
upb_msg_set(intern->msg, f, msgval, arena);
upb_Message_Set(intern->msg, f, msgval, arena);
}
ZEND_BEGIN_ARG_INFO_EX(arginfo_construct, 0, 0, 0)
@ -1108,26 +1127,27 @@ static zend_function_entry Message_methods[] = {
static const char TYPE_URL_PREFIX[] = "type.googleapis.com/";
static upb_msgval Message_getval(Message *intern, const char *field_name) {
const upb_fielddef *f = upb_msgdef_ntofz(intern->desc->msgdef, field_name);
static upb_MessageValue Message_getval(Message *intern, const char *field_name) {
const upb_FieldDef *f = upb_MessageDef_FindFieldByName(intern->desc->msgdef, field_name);
PBPHP_ASSERT(f);
return upb_msg_get(intern->msg, f);
return upb_Message_Get(intern->msg, f);
}
static void Message_setval(Message *intern, const char *field_name,
upb_msgval val) {
const upb_fielddef *f = upb_msgdef_ntofz(intern->desc->msgdef, field_name);
upb_MessageValue val) {
const upb_FieldDef *f =
upb_MessageDef_FindFieldByName(intern->desc->msgdef, field_name);
PBPHP_ASSERT(f);
return upb_msg_set(intern->msg, f, val, Arena_Get(&intern->arena));
upb_Message_Set(intern->msg, f, val, Arena_Get(&intern->arena));
}
static upb_msgval StringVal(upb_strview view) {
upb_msgval ret;
static upb_MessageValue StringVal(upb_StringView view) {
upb_MessageValue ret;
ret.str_val = view;
return ret;
}
static bool TryStripUrlPrefix(upb_strview *str) {
static bool TryStripUrlPrefix(upb_StringView *str) {
size_t size = strlen(TYPE_URL_PREFIX);
if (str->size < size || memcmp(TYPE_URL_PREFIX, str->data, size) != 0) {
return false;
@ -1137,17 +1157,17 @@ static bool TryStripUrlPrefix(upb_strview *str) {
return true;
}
static bool StrViewEq(upb_strview view, const char *str) {
static bool StrViewEq(upb_StringView view, const char *str) {
size_t size = strlen(str);
return view.size == size && memcmp(view.data, str, size) == 0;
}
PHP_METHOD(google_protobuf_Any, unpack) {
Message* intern = (Message*)Z_OBJ_P(getThis());
upb_strview type_url = Message_getval(intern, "type_url").str_val;
upb_strview value = Message_getval(intern, "value").str_val;
upb_symtab *symtab = DescriptorPool_GetSymbolTable();
const upb_msgdef *m;
upb_StringView type_url = Message_getval(intern, "type_url").str_val;
upb_StringView value = Message_getval(intern, "value").str_val;
upb_DefPool *symtab = DescriptorPool_GetSymbolTable();
const upb_MessageDef *m;
Descriptor *desc;
zval ret;
@ -1159,7 +1179,7 @@ PHP_METHOD(google_protobuf_Any, unpack) {
return;
}
m = upb_symtab_lookupmsg2(symtab, type_url.data, type_url.size);
m = upb_DefPool_FindMessageByNameWithSize(symtab, type_url.data, type_url.size);
if (m == NULL) {
zend_throw_exception(
@ -1176,26 +1196,27 @@ PHP_METHOD(google_protobuf_Any, unpack) {
ZVAL_OBJ(&ret, obj);
// Get value.
if (!upb_decode(value.data, value.size, msg->msg,
upb_msgdef_layout(desc->msgdef), Arena_Get(&msg->arena))) {
if (upb_Decode(value.data, value.size, msg->msg,
upb_MessageDef_MiniTable(desc->msgdef), NULL, 0,
Arena_Get(&msg->arena)) != kUpb_DecodeStatus_Ok) {
zend_throw_exception_ex(NULL, 0, "Error occurred during parsing");
zval_dtor(&ret);
return;
}
// Fuse since the parsed message could alias "value".
upb_arena_fuse(Arena_Get(&intern->arena), Arena_Get(&msg->arena));
upb_Arena_Fuse(Arena_Get(&intern->arena), Arena_Get(&msg->arena));
RETURN_COPY_VALUE(&ret);
}
PHP_METHOD(google_protobuf_Any, pack) {
Message* intern = (Message*)Z_OBJ_P(getThis());
upb_arena *arena = Arena_Get(&intern->arena);
upb_Arena *arena = Arena_Get(&intern->arena);
zval *val;
Message *msg;
upb_strview value;
upb_strview type_url;
upb_StringView value;
upb_StringView type_url;
const char *full_name;
char *buf;
@ -1212,14 +1233,14 @@ PHP_METHOD(google_protobuf_Any, pack) {
msg = (Message*)Z_OBJ_P(val);
// Serialize and set value.
value.data = upb_encode(msg->msg, upb_msgdef_layout(msg->desc->msgdef), arena,
&value.size);
value.data = upb_Encode(msg->msg, upb_MessageDef_MiniTable(msg->desc->msgdef),
0, arena, &value.size);
Message_setval(intern, "value", StringVal(value));
// Set type url: type_url_prefix + fully_qualified_name
full_name = upb_msgdef_fullname(msg->desc->msgdef);
full_name = upb_MessageDef_FullName(msg->desc->msgdef);
type_url.size = strlen(TYPE_URL_PREFIX) + strlen(full_name);
buf = upb_arena_malloc(arena, type_url.size + 1);
buf = upb_Arena_Malloc(arena, type_url.size + 1);
memcpy(buf, TYPE_URL_PREFIX, strlen(TYPE_URL_PREFIX));
memcpy(buf + strlen(TYPE_URL_PREFIX), full_name, strlen(full_name));
type_url.data = buf;
@ -1228,9 +1249,9 @@ PHP_METHOD(google_protobuf_Any, pack) {
PHP_METHOD(google_protobuf_Any, is) {
Message* intern = (Message*)Z_OBJ_P(getThis());
upb_strview type_url = Message_getval(intern, "type_url").str_val;
upb_StringView type_url = Message_getval(intern, "type_url").str_val;
zend_class_entry *klass = NULL;
const upb_msgdef *m;
const upb_MessageDef *m;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "C", &klass) ==
FAILURE) {
@ -1244,7 +1265,7 @@ PHP_METHOD(google_protobuf_Any, is) {
}
RETURN_BOOL(TryStripUrlPrefix(&type_url) &&
StrViewEq(type_url, upb_msgdef_fullname(m)));
StrViewEq(type_url, upb_MessageDef_FullName(m)));
}
PHP_METHOD(google_protobuf_Timestamp, fromDateTime) {
@ -1266,7 +1287,7 @@ PHP_METHOD(google_protobuf_Timestamp, fromDateTime) {
return;
}
upb_msgval timestamp_seconds;
upb_MessageValue timestamp_seconds;
{
zval retval;
zval function_name;
@ -1276,7 +1297,7 @@ PHP_METHOD(google_protobuf_Timestamp, fromDateTime) {
if (call_user_function(EG(function_table), NULL, &function_name, &retval, 1,
datetime) == FAILURE ||
!Convert_PhpToUpb(&retval, &timestamp_seconds,
TypeInfo_FromType(UPB_TYPE_INT64), NULL)) {
TypeInfo_FromType(kUpb_CType_Int64), NULL)) {
zend_error(E_ERROR, "Cannot get timestamp from DateTime.");
return;
}
@ -1285,7 +1306,7 @@ PHP_METHOD(google_protobuf_Timestamp, fromDateTime) {
zval_dtor(&function_name);
}
upb_msgval timestamp_nanos;
upb_MessageValue timestamp_nanos;
{
zval retval;
zval function_name;
@ -1302,7 +1323,7 @@ PHP_METHOD(google_protobuf_Timestamp, fromDateTime) {
if (call_user_function(EG(function_table), NULL, &function_name, &retval, 2,
params) == FAILURE ||
!Convert_PhpToUpb(&retval, &timestamp_nanos,
TypeInfo_FromType(UPB_TYPE_INT32), NULL)) {
TypeInfo_FromType(kUpb_CType_Int32), NULL)) {
zend_error(E_ERROR, "Cannot format DateTime.");
return;
}
@ -1322,8 +1343,8 @@ PHP_METHOD(google_protobuf_Timestamp, fromDateTime) {
PHP_METHOD(google_protobuf_Timestamp, toDateTime) {
Message* intern = (Message*)Z_OBJ_P(getThis());
upb_msgval seconds = Message_getval(intern, "seconds");
upb_msgval nanos = Message_getval(intern, "nanos");
upb_MessageValue seconds = Message_getval(intern, "seconds");
upb_MessageValue nanos = Message_getval(intern, "nanos");
// Get formatted time string.
char formatted_time[32];

View File

@ -38,24 +38,24 @@
// Registers the PHP Message class.
void Message_ModuleInit();
// Gets a upb_msg* for the PHP object |val|, which must either be a Message
// Gets a upb_Message* for the PHP object |val|, which must either be a Message
// object or 'null'. Returns true and stores the message in |msg| if the
// conversion succeeded (we can't return upb_msg* because null->NULL is a valid
// conversion succeeded (we can't return upb_Message* because null->NULL is a valid
// conversion). Returns false and raises a PHP error if this isn't a Message
// object or null, or if the Message object doesn't match this Descriptor.
//
// The given |arena| will be fused to this message's arena.
bool Message_GetUpbMessage(zval *val, const Descriptor *desc, upb_arena *arena,
upb_msg **msg);
bool Message_GetUpbMessage(zval *val, const Descriptor *desc, upb_Arena *arena,
upb_Message **msg);
// Gets or creates a PHP Message object to wrap the given upb_msg* and |desc|
// Gets or creates a PHP Message object to wrap the given upb_Message* and |desc|
// and returns it in |val|. The PHP object will keep a reference to this |arena|
// to ensure the underlying message data stays alive.
//
// If |msg| is NULL, this will return a PHP null.
void Message_GetPhpWrapper(zval *val, const Descriptor *desc, upb_msg *msg,
void Message_GetPhpWrapper(zval *val, const Descriptor *desc, upb_Message *msg,
zval *arena);
bool ValueEq(upb_msgval val1, upb_msgval val2, TypeInfo type);
bool ValueEq(upb_MessageValue val1, upb_MessageValue val2, TypeInfo type);
#endif // PHP_PROTOBUF_MESSAGE_H_

View File

@ -208,14 +208,29 @@ static void fill_classname(const char *fullname,
}
}
char *GetPhpClassname(const upb_filedef *file, const char *fullname) {
char *str_view_dup(upb_StringView str) {
char *ret = malloc(str.size + 1);
memcpy(ret, str.data, str.size);
ret[str.size] = '\0';
return ret;
}
char *GetPhpClassname(const upb_FileDef *file, const char *fullname) {
// Prepend '.' to package name to make it absolute. In the 5 additional
// bytes allocated, one for '.', one for trailing 0, and 3 for 'GPB' if
// given message is google.protobuf.Empty.
const char *package = upb_filedef_package(file);
const char *php_namespace = upb_filedef_phpnamespace(file);
const char *prefix = upb_filedef_phpprefix(file);
const google_protobuf_FileOptions* opts = upb_FileDef_Options(file);
const char *package = upb_FileDef_Package(file);
char *php_namespace =
google_protobuf_FileOptions_has_php_namespace(opts)
? str_view_dup(google_protobuf_FileOptions_php_namespace(opts))
: NULL;
char *prefix =
google_protobuf_FileOptions_has_php_class_prefix(opts)
? str_view_dup(google_protobuf_FileOptions_php_class_prefix(opts))
: NULL;
char *ret;
stringsink namesink;
stringsink_init(&namesink);
@ -224,5 +239,7 @@ char *GetPhpClassname(const upb_filedef *file, const char *fullname) {
stringsink_string(&namesink, "\0", 1);
ret = strdup(namesink.ptr);
stringsink_uninit(&namesink);
free(php_namespace);
free(prefix);
return ret;
}

View File

@ -35,6 +35,6 @@
// Translates a protobuf symbol name (eg. foo.bar.Baz) into a PHP class name
// (eg. \Foo\Bar\Baz).
char *GetPhpClassname(const upb_filedef *file, const char *fullname);
char *GetPhpClassname(const upb_FileDef *file, const char *fullname);
#endif // PHP_PROTOBUF_NAMES_H_

View File

@ -46,6 +46,9 @@
<file baseinstalldir="/" name="protobuf.c" role="src"/>
<file baseinstalldir="/" name="protobuf.h" role="src"/>
<file baseinstalldir="/" name="wkt.inc" role="src"/>
<file baseinstalldir="/" name="third_party/utf8_range/naive.c" role="doc"/>
<file baseinstalldir="/" name="third_party/utf8_range/range2-neon.c" role="doc"/>
<file baseinstalldir="/" name="third_party/utf8_range/range2-sse.c" role="doc"/>
<file baseinstalldir="/" name="LICENSE" role="doc"/>
</dir>
</contents>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -62,11 +62,11 @@ ZEND_BEGIN_MODULE_GLOBALS(protobuf)
// that all descriptors are loaded from the main thread.
zval generated_pool;
// A upb_symtab that we are saving for the next request so that we don't have
// A upb_DefPool that we are saving for the next request so that we don't have
// to rebuild it from scratch. When keep_descriptor_pool_after_request==true,
// we steal the upb_symtab from the global DescriptorPool object just before
// we steal the upb_DefPool from the global DescriptorPool object just before
// destroying it.
upb_symtab *global_symtab;
upb_DefPool *global_symtab;
// Object cache (see interface in protobuf.h).
HashTable object_cache;
@ -85,7 +85,7 @@ ZEND_END_MODULE_GLOBALS(protobuf)
void free_protobuf_globals(zend_protobuf_globals *globals) {
zend_hash_destroy(&globals->name_msg_cache);
zend_hash_destroy(&globals->name_enum_cache);
upb_symtab_free(globals->global_symtab);
upb_DefPool_Free(globals->global_symtab);
globals->global_symtab = NULL;
}
@ -171,9 +171,9 @@ static PHP_GINIT_FUNCTION(protobuf) {
static PHP_RINIT_FUNCTION(protobuf) {
// Create the global generated pool.
// Reuse the symtab (if any) left to us by the last request.
upb_symtab *symtab = PROTOBUF_G(global_symtab);
upb_DefPool *symtab = PROTOBUF_G(global_symtab);
if (!symtab) {
PROTOBUF_G(global_symtab) = symtab = upb_symtab_new();
PROTOBUF_G(global_symtab) = symtab = upb_DefPool_New();
zend_hash_init(&PROTOBUF_G(name_msg_cache), 64, NULL, NULL, 0);
zend_hash_init(&PROTOBUF_G(name_enum_cache), 64, NULL, NULL, 0);
}
@ -246,20 +246,20 @@ bool ObjCache_Get(const void *upb_obj, zval *val) {
// Name Cache.
// -----------------------------------------------------------------------------
void NameMap_AddMessage(const upb_msgdef *m) {
char *k = GetPhpClassname(upb_msgdef_file(m), upb_msgdef_fullname(m));
void NameMap_AddMessage(const upb_MessageDef *m) {
char *k = GetPhpClassname(upb_MessageDef_File(m), upb_MessageDef_FullName(m));
zend_hash_str_add_ptr(&PROTOBUF_G(name_msg_cache), k, strlen(k), (void*)m);
free(k);
}
void NameMap_AddEnum(const upb_enumdef *e) {
char *k = GetPhpClassname(upb_enumdef_file(e), upb_enumdef_fullname(e));
void NameMap_AddEnum(const upb_EnumDef *e) {
char *k = GetPhpClassname(upb_EnumDef_File(e), upb_EnumDef_FullName(e));
zend_hash_str_add_ptr(&PROTOBUF_G(name_enum_cache), k, strlen(k), (void*)e);
free(k);
}
const upb_msgdef *NameMap_GetMessage(zend_class_entry *ce) {
const upb_msgdef *ret =
const upb_MessageDef *NameMap_GetMessage(zend_class_entry *ce) {
const upb_MessageDef *ret =
zend_hash_find_ptr(&PROTOBUF_G(name_msg_cache), ce->name);
if (!ret && ce->create_object) {
@ -282,8 +282,8 @@ const upb_msgdef *NameMap_GetMessage(zend_class_entry *ce) {
return ret;
}
const upb_enumdef *NameMap_GetEnum(zend_class_entry *ce) {
const upb_enumdef *ret =
const upb_EnumDef *NameMap_GetEnum(zend_class_entry *ce) {
const upb_EnumDef *ret =
zend_hash_find_ptr(&PROTOBUF_G(name_enum_cache), ce->name);
return ret;
}

View File

@ -131,12 +131,12 @@ ZEND_END_ARG_INFO()
// ptr -> PHP object cache. This is a weak map that caches lazily-created
// wrapper objects around upb types:
// * upb_msg* -> Message
// * upb_array* -> RepeatedField
// * upb_map*, -> MapField
// * upb_msgdef* -> Descriptor
// * upb_enumdef* -> EnumDescriptor
// * upb_msgdef* -> Descriptor
// * upb_Message* -> Message
// * upb_Array* -> RepeatedField
// * upb_Map*, -> MapField
// * upb_MessageDef* -> Descriptor
// * upb_EnumDef* -> EnumDescriptor
// * upb_MessageDef* -> Descriptor
//
// Each wrapped object should add itself to the map when it is constructed, and
// remove itself from the map when it is destroyed. This is how we ensure that
@ -149,12 +149,12 @@ bool ObjCache_Get(const void *key, zval *val);
// PHP class name map. This is necessary because the pb_name->php_class_name
// transformation is non-reversible, so when we need to look up a msgdef or
// enumdef by PHP class, we can't turn the class name into a pb_name.
// * php_class_name -> upb_msgdef*
// * php_class_name -> upb_enumdef*
void NameMap_AddMessage(const upb_msgdef *m);
void NameMap_AddEnum(const upb_enumdef *m);
const upb_msgdef *NameMap_GetMessage(zend_class_entry *ce);
const upb_enumdef *NameMap_GetEnum(zend_class_entry *ce);
// * php_class_name -> upb_MessageDef*
// * php_class_name -> upb_EnumDef*
void NameMap_AddMessage(const upb_MessageDef *m);
void NameMap_AddEnum(const upb_EnumDef *m);
const upb_MessageDef *NameMap_GetMessage(zend_class_entry *ce);
const upb_EnumDef *NameMap_GetEnum(zend_class_entry *ce);
// Add this descriptor object to the global list of descriptors that will be
// kept alive for the duration of the request but destroyed when the request

File diff suppressed because it is too large Load Diff

View File

@ -698,6 +698,16 @@ class EncodeDecodeTest extends TestBase
$m->mergeFromString(hex2bin('7A01'));
}
public function testEncodeDecodeValidUtf8()
{
$m = new TestMessage();
$m->mergeFromJsonString("{\"optionalString\":\"\\u1000\"}");
$serialized = $m->serializeToString();
$m2 = new TestMessage();
$m2->mergeFromString($serialized);
$this->assertSame($m->getOptionalString(), $m2->getOptionalString());
}
public function testDecodeInvalidEnum()
{
$this->expectException(Exception::class);

View File

@ -2,9 +2,18 @@
set -e
cd $(dirname $0)
cd $(dirname $0)/..
pushd ../ext/google/protobuf > /dev/null
# utf8_range has to live in the base third_party directory.
# We copy it into the ext/google/protobuf directory for the build
# (and for the release to PECL).
rm -rf ext/google/protobuf/third_party
mkdir -p ext/google/protobuf/third_party/utf8_range
cp ../third_party/utf8_range/* ext/google/protobuf/third_party/utf8_range
echo "Copied utf8_range from ../third_party -> ext/google/protobuf/third_party"
pushd ext/google/protobuf > /dev/null
CONFIGURE_OPTIONS=("./configure" "--with-php-config=$(which php-config)")

View File

@ -84,7 +84,9 @@ else
# We need utf8_range in-tree.
FileUtils.mkdir_p("ext/google/protobuf_c/third_party/utf8_range")
FileUtils.cp("../third_party/utf8_range/utf8_range.h", "ext/google/protobuf_c/third_party/utf8_range")
FileUtils.cp("../third_party/utf8_range/utf8_range.c", "ext/google/protobuf_c/third_party/utf8_range")
FileUtils.cp("../third_party/utf8_range/naive.c", "ext/google/protobuf_c/third_party/utf8_range")
FileUtils.cp("../third_party/utf8_range/range2-neon.c", "ext/google/protobuf_c/third_party/utf8_range")
FileUtils.cp("../third_party/utf8_range/range2-sse.c", "ext/google/protobuf_c/third_party/utf8_range")
FileUtils.cp("../third_party/utf8_range/LICENSE", "ext/google/protobuf_c/third_party/utf8_range")
end

View File

@ -23,6 +23,6 @@ $INCFLAGS << "$(srcdir)/third_party/utf8_range"
$srcs = ["protobuf.c", "convert.c", "defs.c", "message.c",
"repeated_field.c", "map.c", "ruby-upb.c", "wrap_memcpy.c",
"utf8_range.c"]
"naive.c", "range2-neon.c", "range2-sse.c"]
create_makefile(ext_name)

View File

@ -658,5 +658,13 @@ module BasicTest
assert_equal str, m.optional_string
assert_equal str, m.optional_bytes
end
def test_utf8
m = proto_module::TestMessage.new(
optional_string: "µpb",
)
m2 = proto_module::TestMessage.decode(proto_module::TestMessage.encode(m))
assert_equal m2, m
end
end
end

View File

@ -1870,44 +1870,45 @@ void GenerateCEnum(const EnumDescriptor* desc, io::Printer* printer) {
"\n"
"PHP_METHOD($c_name$, name) {\n"
" $file_c_name$_AddDescriptor();\n"
" const upb_symtab *symtab = DescriptorPool_GetSymbolTable();\n"
" const upb_enumdef *e = upb_symtab_lookupenum(symtab, \"$name$\");\n"
" const char *name;\n"
" const upb_DefPool *symtab = DescriptorPool_GetSymbolTable();\n"
" const upb_EnumDef *e = upb_DefPool_FindEnumByName(symtab, \"$name$\");\n"
" zend_long value;\n"
" if (zend_parse_parameters(ZEND_NUM_ARGS(), \"l\", &value) ==\n"
" FAILURE) {\n"
" return;\n"
" }\n"
" name = upb_enumdef_iton(e, value);\n"
" if (!name) {\n"
" const upb_EnumValueDef* ev =\n"
" upb_EnumDef_FindValueByNumber(e, value);\n"
" if (!ev) {\n"
" zend_throw_exception_ex(NULL, 0,\n"
" \"$php_name$ has no name \"\n"
" \"defined for value \" ZEND_LONG_FMT \".\",\n"
" value);\n"
" return;\n"
" }\n"
" RETURN_STRING(name);\n"
" RETURN_STRING(upb_EnumValueDef_Name(ev));\n"
"}\n"
"\n"
"PHP_METHOD($c_name$, value) {\n"
" $file_c_name$_AddDescriptor();\n"
" const upb_symtab *symtab = DescriptorPool_GetSymbolTable();\n"
" const upb_enumdef *e = upb_symtab_lookupenum(symtab, \"$name$\");\n"
" const upb_DefPool *symtab = DescriptorPool_GetSymbolTable();\n"
" const upb_EnumDef *e = upb_DefPool_FindEnumByName(symtab, \"$name$\");\n"
" char *name = NULL;\n"
" size_t name_len;\n"
" int32_t num;\n"
" if (zend_parse_parameters(ZEND_NUM_ARGS(), \"s\", &name,\n"
" &name_len) == FAILURE) {\n"
" return;\n"
" }\n"
" if (!upb_enumdef_ntoi(e, name, name_len, &num)) {\n"
" const upb_EnumValueDef* ev = upb_EnumDef_FindValueByNameWithSize(\n"
" e, name, name_len);\n"
" if (!ev) {\n"
" zend_throw_exception_ex(NULL, 0,\n"
" \"$php_name$ has no value \"\n"
" \"defined for name %s.\",\n"
" name);\n"
" return;\n"
" }\n"
" RETURN_LONG(num);\n"
" RETURN_LONG(upb_EnumValueDef_Number(ev));\n"
"}\n"
"\n"
"static zend_function_entry $c_name$_phpmethods[] = {\n"
@ -1966,8 +1967,8 @@ void GenerateCMessage(const Descriptor* message, io::Printer* printer) {
printer->Print(
"static PHP_METHOD($c_name$, get$camel_name$) {\n"
" Message* intern = (Message*)Z_OBJ_P(getThis());\n"
" const upb_fielddef *f = upb_msgdef_ntofz(intern->desc->msgdef,\n"
" \"$name$\");\n"
" const upb_FieldDef *f = upb_MessageDef_FindFieldByName(\n"
" intern->desc->msgdef, \"$name$\");\n"
" zval ret;\n"
" Message_get(intern, f, &ret);\n"
" RETURN_COPY_VALUE(&ret);\n"
@ -1975,8 +1976,8 @@ void GenerateCMessage(const Descriptor* message, io::Printer* printer) {
"\n"
"static PHP_METHOD($c_name$, set$camel_name$) {\n"
" Message* intern = (Message*)Z_OBJ_P(getThis());\n"
" const upb_fielddef *f = upb_msgdef_ntofz(intern->desc->msgdef,\n"
" \"$name$\");\n"
" const upb_FieldDef *f = upb_MessageDef_FindFieldByName(\n"
" intern->desc->msgdef, \"$name$\");\n"
" zval *val;\n"
" if (zend_parse_parameters(ZEND_NUM_ARGS(), \"z\", &val)\n"
" == FAILURE) {\n"
@ -1996,10 +1997,11 @@ void GenerateCMessage(const Descriptor* message, io::Printer* printer) {
printer->Print(
"static PHP_METHOD($c_name$, get$camel_name$) {\n"
" Message* intern = (Message*)Z_OBJ_P(getThis());\n"
" const upb_oneofdef *oneof = upb_msgdef_ntooz(intern->desc->msgdef,\n"
" \"$name$\");\n"
" const upb_fielddef *field = upb_msg_whichoneof(intern->msg, oneof);\n"
" RETURN_STRING(field ? upb_fielddef_name(field) : \"\");\n"
" const upb_OneofDef *oneof = upb_MessageDef_FindOneofByName(\n"
" intern->desc->msgdef, \"$name$\");\n"
" const upb_FieldDef *field = \n"
" upb_Message_WhichOneof(intern->msg, oneof);\n"
" RETURN_STRING(field ? upb_FieldDef_Name(field) : \"\");\n"
"}\n",
"c_name", c_name,
"name", oneof->name(),

View File

@ -1,8 +1,14 @@
# Pulled from: https://github.com/cyb70289/utf8
cc_library(
name = "utf8_range",
hdrs = ["utf8_range.h"],
srcs = ["utf8_range.c"],
srcs = [
"naive.c",
"range2-neon.c",
"range2-sse.c",
],
visibility = ["//:__pkg__"],
)

92
third_party/utf8_range/naive.c vendored Normal file
View File

@ -0,0 +1,92 @@
#include <stdio.h>
/*
* http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
*
* Table 3-7. Well-Formed UTF-8 Byte Sequences
*
* +--------------------+------------+-------------+------------+-------------+
* | Code Points | First Byte | Second Byte | Third Byte | Fourth Byte |
* +--------------------+------------+-------------+------------+-------------+
* | U+0000..U+007F | 00..7F | | | |
* +--------------------+------------+-------------+------------+-------------+
* | U+0080..U+07FF | C2..DF | 80..BF | | |
* +--------------------+------------+-------------+------------+-------------+
* | U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
* +--------------------+------------+-------------+------------+-------------+
* | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | |
* +--------------------+------------+-------------+------------+-------------+
* | U+D000..U+D7FF | ED | 80..9F | 80..BF | |
* +--------------------+------------+-------------+------------+-------------+
* | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | |
* +--------------------+------------+-------------+------------+-------------+
* | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
* +--------------------+------------+-------------+------------+-------------+
* | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
* +--------------------+------------+-------------+------------+-------------+
* | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
* +--------------------+------------+-------------+------------+-------------+
*/
/* Return 0 - success, >0 - index(1 based) of first error char */
int utf8_naive(const unsigned char *data, int len)
{
int err_pos = 1;
while (len) {
int bytes;
const unsigned char byte1 = data[0];
/* 00..7F */
if (byte1 <= 0x7F) {
bytes = 1;
/* C2..DF, 80..BF */
} else if (len >= 2 && byte1 >= 0xC2 && byte1 <= 0xDF &&
(signed char)data[1] <= (signed char)0xBF) {
bytes = 2;
} else if (len >= 3) {
const unsigned char byte2 = data[1];
/* Is byte2, byte3 between 0x80 ~ 0xBF */
const int byte2_ok = (signed char)byte2 <= (signed char)0xBF;
const int byte3_ok = (signed char)data[2] <= (signed char)0xBF;
if (byte2_ok && byte3_ok &&
/* E0, A0..BF, 80..BF */
((byte1 == 0xE0 && byte2 >= 0xA0) ||
/* E1..EC, 80..BF, 80..BF */
(byte1 >= 0xE1 && byte1 <= 0xEC) ||
/* ED, 80..9F, 80..BF */
(byte1 == 0xED && byte2 <= 0x9F) ||
/* EE..EF, 80..BF, 80..BF */
(byte1 >= 0xEE && byte1 <= 0xEF))) {
bytes = 3;
} else if (len >= 4) {
/* Is byte4 between 0x80 ~ 0xBF */
const int byte4_ok = (signed char)data[3] <= (signed char)0xBF;
if (byte2_ok && byte3_ok && byte4_ok &&
/* F0, 90..BF, 80..BF, 80..BF */
((byte1 == 0xF0 && byte2 >= 0x90) ||
/* F1..F3, 80..BF, 80..BF, 80..BF */
(byte1 >= 0xF1 && byte1 <= 0xF3) ||
/* F4, 80..8F, 80..BF, 80..BF */
(byte1 == 0xF4 && byte2 <= 0x8F))) {
bytes = 4;
} else {
return err_pos;
}
} else {
return err_pos;
}
} else {
return err_pos;
}
len -= bytes;
err_pos += bytes;
data += bytes;
}
return 0;
}

157
third_party/utf8_range/range2-neon.c vendored Normal file
View File

@ -0,0 +1,157 @@
/*
* Process 2x16 bytes in each iteration.
* Comments removed for brevity. See range-neon.c for details.
*/
#if defined(__aarch64__) && defined(__ARM_NEON)
#include <stdio.h>
#include <stdint.h>
#include <arm_neon.h>
int utf8_naive(const unsigned char *data, int len);
static const uint8_t _first_len_tbl[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3,
};
static const uint8_t _first_range_tbl[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8,
};
static const uint8_t _range_min_tbl[] = {
0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80,
0xC2, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
};
static const uint8_t _range_max_tbl[] = {
0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F,
0xF4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
static const uint8_t _range_adjust_tbl[] = {
2, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0,
};
/* Return 0 on success, -1 on error */
int utf8_range2(const unsigned char *data, int len)
{
if (len >= 32) {
uint8x16_t prev_input = vdupq_n_u8(0);
uint8x16_t prev_first_len = vdupq_n_u8(0);
const uint8x16_t first_len_tbl = vld1q_u8(_first_len_tbl);
const uint8x16_t first_range_tbl = vld1q_u8(_first_range_tbl);
const uint8x16_t range_min_tbl = vld1q_u8(_range_min_tbl);
const uint8x16_t range_max_tbl = vld1q_u8(_range_max_tbl);
const uint8x16x2_t range_adjust_tbl = vld2q_u8(_range_adjust_tbl);
const uint8x16_t const_1 = vdupq_n_u8(1);
const uint8x16_t const_2 = vdupq_n_u8(2);
const uint8x16_t const_e0 = vdupq_n_u8(0xE0);
uint8x16_t error1 = vdupq_n_u8(0);
uint8x16_t error2 = vdupq_n_u8(0);
uint8x16_t error3 = vdupq_n_u8(0);
uint8x16_t error4 = vdupq_n_u8(0);
while (len >= 32) {
/******************* two blocks interleaved **********************/
#if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ < 8)
/* gcc doesn't support vldq1_u8_x2 until version 8 */
const uint8x16_t input_a = vld1q_u8(data);
const uint8x16_t input_b = vld1q_u8(data + 16);
#else
/* Forces a double load on Clang */
const uint8x16x2_t input_pair = vld1q_u8_x2(data);
const uint8x16_t input_a = input_pair.val[0];
const uint8x16_t input_b = input_pair.val[1];
#endif
const uint8x16_t high_nibbles_a = vshrq_n_u8(input_a, 4);
const uint8x16_t high_nibbles_b = vshrq_n_u8(input_b, 4);
const uint8x16_t first_len_a =
vqtbl1q_u8(first_len_tbl, high_nibbles_a);
const uint8x16_t first_len_b =
vqtbl1q_u8(first_len_tbl, high_nibbles_b);
uint8x16_t range_a = vqtbl1q_u8(first_range_tbl, high_nibbles_a);
uint8x16_t range_b = vqtbl1q_u8(first_range_tbl, high_nibbles_b);
range_a =
vorrq_u8(range_a, vextq_u8(prev_first_len, first_len_a, 15));
range_b =
vorrq_u8(range_b, vextq_u8(first_len_a, first_len_b, 15));
uint8x16_t tmp1_a, tmp2_a, tmp1_b, tmp2_b;
tmp1_a = vextq_u8(prev_first_len, first_len_a, 14);
tmp1_a = vqsubq_u8(tmp1_a, const_1);
range_a = vorrq_u8(range_a, tmp1_a);
tmp1_b = vextq_u8(first_len_a, first_len_b, 14);
tmp1_b = vqsubq_u8(tmp1_b, const_1);
range_b = vorrq_u8(range_b, tmp1_b);
tmp2_a = vextq_u8(prev_first_len, first_len_a, 13);
tmp2_a = vqsubq_u8(tmp2_a, const_2);
range_a = vorrq_u8(range_a, tmp2_a);
tmp2_b = vextq_u8(first_len_a, first_len_b, 13);
tmp2_b = vqsubq_u8(tmp2_b, const_2);
range_b = vorrq_u8(range_b, tmp2_b);
uint8x16_t shift1_a = vextq_u8(prev_input, input_a, 15);
uint8x16_t pos_a = vsubq_u8(shift1_a, const_e0);
range_a = vaddq_u8(range_a, vqtbl2q_u8(range_adjust_tbl, pos_a));
uint8x16_t shift1_b = vextq_u8(input_a, input_b, 15);
uint8x16_t pos_b = vsubq_u8(shift1_b, const_e0);
range_b = vaddq_u8(range_b, vqtbl2q_u8(range_adjust_tbl, pos_b));
uint8x16_t minv_a = vqtbl1q_u8(range_min_tbl, range_a);
uint8x16_t maxv_a = vqtbl1q_u8(range_max_tbl, range_a);
uint8x16_t minv_b = vqtbl1q_u8(range_min_tbl, range_b);
uint8x16_t maxv_b = vqtbl1q_u8(range_max_tbl, range_b);
error1 = vorrq_u8(error1, vcltq_u8(input_a, minv_a));
error2 = vorrq_u8(error2, vcgtq_u8(input_a, maxv_a));
error3 = vorrq_u8(error3, vcltq_u8(input_b, minv_b));
error4 = vorrq_u8(error4, vcgtq_u8(input_b, maxv_b));
/************************ next iteration *************************/
prev_input = input_b;
prev_first_len = first_len_b;
data += 32;
len -= 32;
}
error1 = vorrq_u8(error1, error2);
error1 = vorrq_u8(error1, error3);
error1 = vorrq_u8(error1, error4);
if (vmaxvq_u8(error1))
return -1;
uint32_t token4;
vst1q_lane_u32(&token4, vreinterpretq_u32_u8(prev_input), 3);
const int8_t *token = (const int8_t *)&token4;
int lookahead = 0;
if (token[3] > (int8_t)0xBF)
lookahead = 1;
else if (token[2] > (int8_t)0xBF)
lookahead = 2;
else if (token[1] > (int8_t)0xBF)
lookahead = 3;
data -= lookahead;
len += lookahead;
}
return utf8_naive(data, len);
}
#endif

170
third_party/utf8_range/range2-sse.c vendored Normal file
View File

@ -0,0 +1,170 @@
/*
* Process 2x16 bytes in each iteration.
* Comments removed for brevity. See range-sse.c for details.
*/
#ifdef __SSE4_1__
#include <stdio.h>
#include <stdint.h>
#include <x86intrin.h>
int utf8_naive(const unsigned char *data, int len);
static const int8_t _first_len_tbl[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3,
};
static const int8_t _first_range_tbl[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8,
};
static const int8_t _range_min_tbl[] = {
0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80,
0xC2, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F,
};
static const int8_t _range_max_tbl[] = {
0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F,
0xF4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
};
static const int8_t _df_ee_tbl[] = {
0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
};
static const int8_t _ef_fe_tbl[] = {
0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
/* Return 0 on success, -1 on error */
int utf8_range2(const unsigned char *data, int len)
{
if (len >= 32) {
__m128i prev_input = _mm_set1_epi8(0);
__m128i prev_first_len = _mm_set1_epi8(0);
const __m128i first_len_tbl =
_mm_loadu_si128((const __m128i *)_first_len_tbl);
const __m128i first_range_tbl =
_mm_loadu_si128((const __m128i *)_first_range_tbl);
const __m128i range_min_tbl =
_mm_loadu_si128((const __m128i *)_range_min_tbl);
const __m128i range_max_tbl =
_mm_loadu_si128((const __m128i *)_range_max_tbl);
const __m128i df_ee_tbl =
_mm_loadu_si128((const __m128i *)_df_ee_tbl);
const __m128i ef_fe_tbl =
_mm_loadu_si128((const __m128i *)_ef_fe_tbl);
__m128i error = _mm_set1_epi8(0);
while (len >= 32) {
/***************************** block 1 ****************************/
const __m128i input_a = _mm_loadu_si128((const __m128i *)data);
__m128i high_nibbles =
_mm_and_si128(_mm_srli_epi16(input_a, 4), _mm_set1_epi8(0x0F));
__m128i first_len_a = _mm_shuffle_epi8(first_len_tbl, high_nibbles);
__m128i range_a = _mm_shuffle_epi8(first_range_tbl, high_nibbles);
range_a = _mm_or_si128(
range_a, _mm_alignr_epi8(first_len_a, prev_first_len, 15));
__m128i tmp;
tmp = _mm_alignr_epi8(first_len_a, prev_first_len, 14);
tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(1));
range_a = _mm_or_si128(range_a, tmp);
tmp = _mm_alignr_epi8(first_len_a, prev_first_len, 13);
tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(2));
range_a = _mm_or_si128(range_a, tmp);
__m128i shift1, pos, range2;
shift1 = _mm_alignr_epi8(input_a, prev_input, 15);
pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF));
tmp = _mm_subs_epu8(pos, _mm_set1_epi8(0xF0));
range2 = _mm_shuffle_epi8(df_ee_tbl, tmp);
tmp = _mm_adds_epu8(pos, _mm_set1_epi8(0x70));
range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp));
range_a = _mm_add_epi8(range_a, range2);
__m128i minv = _mm_shuffle_epi8(range_min_tbl, range_a);
__m128i maxv = _mm_shuffle_epi8(range_max_tbl, range_a);
tmp = _mm_or_si128(
_mm_cmplt_epi8(input_a, minv),
_mm_cmpgt_epi8(input_a, maxv)
);
error = _mm_or_si128(error, tmp);
/***************************** block 2 ****************************/
const __m128i input_b = _mm_loadu_si128((const __m128i *)(data+16));
high_nibbles =
_mm_and_si128(_mm_srli_epi16(input_b, 4), _mm_set1_epi8(0x0F));
__m128i first_len_b = _mm_shuffle_epi8(first_len_tbl, high_nibbles);
__m128i range_b = _mm_shuffle_epi8(first_range_tbl, high_nibbles);
range_b = _mm_or_si128(
range_b, _mm_alignr_epi8(first_len_b, first_len_a, 15));
tmp = _mm_alignr_epi8(first_len_b, first_len_a, 14);
tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(1));
range_b = _mm_or_si128(range_b, tmp);
tmp = _mm_alignr_epi8(first_len_b, first_len_a, 13);
tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(2));
range_b = _mm_or_si128(range_b, tmp);
shift1 = _mm_alignr_epi8(input_b, input_a, 15);
pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF));
tmp = _mm_subs_epu8(pos, _mm_set1_epi8(0xF0));
range2 = _mm_shuffle_epi8(df_ee_tbl, tmp);
tmp = _mm_adds_epu8(pos, _mm_set1_epi8(0x70));
range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp));
range_b = _mm_add_epi8(range_b, range2);
minv = _mm_shuffle_epi8(range_min_tbl, range_b);
maxv = _mm_shuffle_epi8(range_max_tbl, range_b);
tmp = _mm_or_si128(
_mm_cmplt_epi8(input_b, minv),
_mm_cmpgt_epi8(input_b, maxv)
);
error = _mm_or_si128(error, tmp);
/************************ next iteration **************************/
prev_input = input_b;
prev_first_len = first_len_b;
data += 32;
len -= 32;
}
if (!_mm_testz_si128(error, error))
return -1;
int32_t token4 = _mm_extract_epi32(prev_input, 3);
const int8_t *token = (const int8_t *)&token4;
int lookahead = 0;
if (token[3] > (int8_t)0xBF)
lookahead = 1;
else if (token[2] > (int8_t)0xBF)
lookahead = 2;
else if (token[1] > (int8_t)0xBF)
lookahead = 3;
data -= lookahead;
len += lookahead;
}
return utf8_naive(data, len);
}
#endif

View File

@ -1,395 +0,0 @@
/*
* http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
*
* Table 3-7. Well-Formed UTF-8 Byte Sequences
*
* +--------------------+------------+-------------+------------+-------------+
* | Code Points | First Byte | Second Byte | Third Byte | Fourth Byte |
* +--------------------+------------+-------------+------------+-------------+
* | U+0000..U+007F | 00..7F | | | |
* +--------------------+------------+-------------+------------+-------------+
* | U+0080..U+07FF | C2..DF | 80..BF | | |
* +--------------------+------------+-------------+------------+-------------+
* | U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
* +--------------------+------------+-------------+------------+-------------+
* | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | |
* +--------------------+------------+-------------+------------+-------------+
* | U+D000..U+D7FF | ED | 80..9F | 80..BF | |
* +--------------------+------------+-------------+------------+-------------+
* | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | |
* +--------------------+------------+-------------+------------+-------------+
* | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
* +--------------------+------------+-------------+------------+-------------+
* | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
* +--------------------+------------+-------------+------------+-------------+
* | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
* +--------------------+------------+-------------+------------+-------------+
*/
/* Return 0 - success, >0 - index(1 based) of first error char */
int utf8_naive(const unsigned char* data, int len) {
int err_pos = 1;
while (len) {
int bytes;
const unsigned char byte1 = data[0];
/* 00..7F */
if (byte1 <= 0x7F) {
bytes = 1;
/* C2..DF, 80..BF */
} else if (len >= 2 && byte1 >= 0xC2 && byte1 <= 0xDF &&
(signed char)data[1] <= (signed char)0xBF) {
bytes = 2;
} else if (len >= 3) {
const unsigned char byte2 = data[1];
/* Is byte2, byte3 between 0x80 ~ 0xBF */
const int byte2_ok = (signed char)byte2 <= (signed char)0xBF;
const int byte3_ok = (signed char)data[2] <= (signed char)0xBF;
if (byte2_ok && byte3_ok &&
/* E0, A0..BF, 80..BF */
((byte1 == 0xE0 && byte2 >= 0xA0) ||
/* E1..EC, 80..BF, 80..BF */
(byte1 >= 0xE1 && byte1 <= 0xEC) ||
/* ED, 80..9F, 80..BF */
(byte1 == 0xED && byte2 <= 0x9F) ||
/* EE..EF, 80..BF, 80..BF */
(byte1 >= 0xEE && byte1 <= 0xEF))) {
bytes = 3;
} else if (len >= 4) {
/* Is byte4 between 0x80 ~ 0xBF */
const int byte4_ok = (signed char)data[3] <= (signed char)0xBF;
if (byte2_ok && byte3_ok && byte4_ok &&
/* F0, 90..BF, 80..BF, 80..BF */
((byte1 == 0xF0 && byte2 >= 0x90) ||
/* F1..F3, 80..BF, 80..BF, 80..BF */
(byte1 >= 0xF1 && byte1 <= 0xF3) ||
/* F4, 80..8F, 80..BF, 80..BF */
(byte1 == 0xF4 && byte2 <= 0x8F))) {
bytes = 4;
} else {
return err_pos;
}
} else {
return err_pos;
}
} else {
return err_pos;
}
len -= bytes;
err_pos += bytes;
data += bytes;
}
return 0;
}
#ifdef __SSE4_1__
#include <stdint.h>
#include <stdio.h>
#include <x86intrin.h>
int utf8_naive(const unsigned char* data, int len);
static const int8_t _first_len_tbl[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3,
};
static const int8_t _first_range_tbl[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8,
};
static const int8_t _range_min_tbl[] = {
0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80,
0xC2, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F,
};
static const int8_t _range_max_tbl[] = {
0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F,
0xF4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
};
static const int8_t _df_ee_tbl[] = {
0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
};
static const int8_t _ef_fe_tbl[] = {
0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
/* Return 0 on success, -1 on error */
int utf8_range2(const unsigned char* data, int len) {
if (len >= 32) {
__m128i prev_input = _mm_set1_epi8(0);
__m128i prev_first_len = _mm_set1_epi8(0);
const __m128i first_len_tbl =
_mm_loadu_si128((const __m128i*)_first_len_tbl);
const __m128i first_range_tbl =
_mm_loadu_si128((const __m128i*)_first_range_tbl);
const __m128i range_min_tbl =
_mm_loadu_si128((const __m128i*)_range_min_tbl);
const __m128i range_max_tbl =
_mm_loadu_si128((const __m128i*)_range_max_tbl);
const __m128i df_ee_tbl = _mm_loadu_si128((const __m128i*)_df_ee_tbl);
const __m128i ef_fe_tbl = _mm_loadu_si128((const __m128i*)_ef_fe_tbl);
__m128i error = _mm_set1_epi8(0);
while (len >= 32) {
/***************************** block 1 ****************************/
const __m128i input_a = _mm_loadu_si128((const __m128i*)data);
__m128i high_nibbles =
_mm_and_si128(_mm_srli_epi16(input_a, 4), _mm_set1_epi8(0x0F));
__m128i first_len_a = _mm_shuffle_epi8(first_len_tbl, high_nibbles);
__m128i range_a = _mm_shuffle_epi8(first_range_tbl, high_nibbles);
range_a = _mm_or_si128(range_a,
_mm_alignr_epi8(first_len_a, prev_first_len, 15));
__m128i tmp;
tmp = _mm_alignr_epi8(first_len_a, prev_first_len, 14);
tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(1));
range_a = _mm_or_si128(range_a, tmp);
tmp = _mm_alignr_epi8(first_len_a, prev_first_len, 13);
tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(2));
range_a = _mm_or_si128(range_a, tmp);
__m128i shift1, pos, range2;
shift1 = _mm_alignr_epi8(input_a, prev_input, 15);
pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF));
tmp = _mm_subs_epu8(pos, _mm_set1_epi8(0xF0));
range2 = _mm_shuffle_epi8(df_ee_tbl, tmp);
tmp = _mm_adds_epu8(pos, _mm_set1_epi8(0x70));
range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp));
range_a = _mm_add_epi8(range_a, range2);
__m128i minv = _mm_shuffle_epi8(range_min_tbl, range_a);
__m128i maxv = _mm_shuffle_epi8(range_max_tbl, range_a);
tmp = _mm_or_si128(_mm_cmplt_epi8(input_a, minv),
_mm_cmpgt_epi8(input_a, maxv));
error = _mm_or_si128(error, tmp);
/***************************** block 2 ****************************/
const __m128i input_b = _mm_loadu_si128((const __m128i*)(data + 16));
high_nibbles =
_mm_and_si128(_mm_srli_epi16(input_b, 4), _mm_set1_epi8(0x0F));
__m128i first_len_b = _mm_shuffle_epi8(first_len_tbl, high_nibbles);
__m128i range_b = _mm_shuffle_epi8(first_range_tbl, high_nibbles);
range_b =
_mm_or_si128(range_b, _mm_alignr_epi8(first_len_b, first_len_a, 15));
tmp = _mm_alignr_epi8(first_len_b, first_len_a, 14);
tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(1));
range_b = _mm_or_si128(range_b, tmp);
tmp = _mm_alignr_epi8(first_len_b, first_len_a, 13);
tmp = _mm_subs_epu8(tmp, _mm_set1_epi8(2));
range_b = _mm_or_si128(range_b, tmp);
shift1 = _mm_alignr_epi8(input_b, input_a, 15);
pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF));
tmp = _mm_subs_epu8(pos, _mm_set1_epi8(0xF0));
range2 = _mm_shuffle_epi8(df_ee_tbl, tmp);
tmp = _mm_adds_epu8(pos, _mm_set1_epi8(0x70));
range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp));
range_b = _mm_add_epi8(range_b, range2);
minv = _mm_shuffle_epi8(range_min_tbl, range_b);
maxv = _mm_shuffle_epi8(range_max_tbl, range_b);
tmp = _mm_or_si128(_mm_cmplt_epi8(input_b, minv),
_mm_cmpgt_epi8(input_b, maxv));
error = _mm_or_si128(error, tmp);
/************************ next iteration **************************/
prev_input = input_b;
prev_first_len = first_len_b;
data += 32;
len -= 32;
}
if (!_mm_testz_si128(error, error)) return -1;
int32_t token4 = _mm_extract_epi32(prev_input, 3);
const int8_t* token = (const int8_t*)&token4;
int lookahead = 0;
if (token[3] > (int8_t)0xBF)
lookahead = 1;
else if (token[2] > (int8_t)0xBF)
lookahead = 2;
else if (token[1] > (int8_t)0xBF)
lookahead = 3;
data -= lookahead;
len += lookahead;
}
return utf8_naive(data, len);
}
#endif
#ifdef __ARM_NEON
#include <arm_neon.h>
#include <stdint.h>
#include <stdio.h>
int utf8_naive(const unsigned char* data, int len);
static const uint8_t _first_len_tbl[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3,
};
static const uint8_t _first_range_tbl[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8,
};
static const uint8_t _range_min_tbl[] = {
0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80,
0xC2, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
};
static const uint8_t _range_max_tbl[] = {
0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F,
0xF4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
static const uint8_t _range_adjust_tbl[] = {
2, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0,
};
/* Return 0 on success, -1 on error */
int utf8_range2(const unsigned char* data, int len) {
if (len >= 32) {
uint8x16_t prev_input = vdupq_n_u8(0);
uint8x16_t prev_first_len = vdupq_n_u8(0);
const uint8x16_t first_len_tbl = vld1q_u8(_first_len_tbl);
const uint8x16_t first_range_tbl = vld1q_u8(_first_range_tbl);
const uint8x16_t range_min_tbl = vld1q_u8(_range_min_tbl);
const uint8x16_t range_max_tbl = vld1q_u8(_range_max_tbl);
const uint8x16x2_t range_adjust_tbl = vld2q_u8(_range_adjust_tbl);
const uint8x16_t const_1 = vdupq_n_u8(1);
const uint8x16_t const_2 = vdupq_n_u8(2);
const uint8x16_t const_e0 = vdupq_n_u8(0xE0);
uint8x16_t error1 = vdupq_n_u8(0);
uint8x16_t error2 = vdupq_n_u8(0);
uint8x16_t error3 = vdupq_n_u8(0);
uint8x16_t error4 = vdupq_n_u8(0);
while (len >= 32) {
/******************* two blocks interleaved **********************/
#if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ < 8)
/* gcc doesn't support vldq1_u8_x2 until version 8 */
const uint8x16_t input_a = vld1q_u8(data);
const uint8x16_t input_b = vld1q_u8(data + 16);
#else
/* Forces a double load on Clang */
const uint8x16x2_t input_pair = vld1q_u8_x2(data);
const uint8x16_t input_a = input_pair.val[0];
const uint8x16_t input_b = input_pair.val[1];
#endif
const uint8x16_t high_nibbles_a = vshrq_n_u8(input_a, 4);
const uint8x16_t high_nibbles_b = vshrq_n_u8(input_b, 4);
const uint8x16_t first_len_a = vqtbl1q_u8(first_len_tbl, high_nibbles_a);
const uint8x16_t first_len_b = vqtbl1q_u8(first_len_tbl, high_nibbles_b);
uint8x16_t range_a = vqtbl1q_u8(first_range_tbl, high_nibbles_a);
uint8x16_t range_b = vqtbl1q_u8(first_range_tbl, high_nibbles_b);
range_a = vorrq_u8(range_a, vextq_u8(prev_first_len, first_len_a, 15));
range_b = vorrq_u8(range_b, vextq_u8(first_len_a, first_len_b, 15));
uint8x16_t tmp1_a, tmp2_a, tmp1_b, tmp2_b;
tmp1_a = vextq_u8(prev_first_len, first_len_a, 14);
tmp1_a = vqsubq_u8(tmp1_a, const_1);
range_a = vorrq_u8(range_a, tmp1_a);
tmp1_b = vextq_u8(first_len_a, first_len_b, 14);
tmp1_b = vqsubq_u8(tmp1_b, const_1);
range_b = vorrq_u8(range_b, tmp1_b);
tmp2_a = vextq_u8(prev_first_len, first_len_a, 13);
tmp2_a = vqsubq_u8(tmp2_a, const_2);
range_a = vorrq_u8(range_a, tmp2_a);
tmp2_b = vextq_u8(first_len_a, first_len_b, 13);
tmp2_b = vqsubq_u8(tmp2_b, const_2);
range_b = vorrq_u8(range_b, tmp2_b);
uint8x16_t shift1_a = vextq_u8(prev_input, input_a, 15);
uint8x16_t pos_a = vsubq_u8(shift1_a, const_e0);
range_a = vaddq_u8(range_a, vqtbl2q_u8(range_adjust_tbl, pos_a));
uint8x16_t shift1_b = vextq_u8(input_a, input_b, 15);
uint8x16_t pos_b = vsubq_u8(shift1_b, const_e0);
range_b = vaddq_u8(range_b, vqtbl2q_u8(range_adjust_tbl, pos_b));
uint8x16_t minv_a = vqtbl1q_u8(range_min_tbl, range_a);
uint8x16_t maxv_a = vqtbl1q_u8(range_max_tbl, range_a);
uint8x16_t minv_b = vqtbl1q_u8(range_min_tbl, range_b);
uint8x16_t maxv_b = vqtbl1q_u8(range_max_tbl, range_b);
error1 = vorrq_u8(error1, vcltq_u8(input_a, minv_a));
error2 = vorrq_u8(error2, vcgtq_u8(input_a, maxv_a));
error3 = vorrq_u8(error3, vcltq_u8(input_b, minv_b));
error4 = vorrq_u8(error4, vcgtq_u8(input_b, maxv_b));
/************************ next iteration *************************/
prev_input = input_b;
prev_first_len = first_len_b;
data += 32;
len -= 32;
}
error1 = vorrq_u8(error1, error2);
error1 = vorrq_u8(error1, error3);
error1 = vorrq_u8(error1, error4);
if (vmaxvq_u8(error1)) return -1;
uint32_t token4;
vst1q_lane_u32(&token4, vreinterpretq_u32_u8(prev_input), 3);
const int8_t* token = (const int8_t*)&token4;
int lookahead = 0;
if (token[3] > (int8_t)0xBF)
lookahead = 1;
else if (token[2] > (int8_t)0xBF)
lookahead = 2;
else if (token[1] > (int8_t)0xBF)
lookahead = 3;
data -= lookahead;
len += lookahead;
}
return utf8_naive(data, len);
}
#endif

View File

@ -1,5 +1,5 @@
#if defined(__ARM_NEON) || defined(__SSE4_1__)
#if (defined(__ARM_NEON) && defined(__aarch64__)) || defined(__SSE4_1__)
int utf8_range2(const unsigned char* data, int len);
#else
int utf8_naive(const unsigned char* data, int len);