1e37a94bb5
Prior to this CL, creating an empty message object would create two empty string objects for every declared field. First we created a unique string object for the field's default. Then we created yet another string object when we assigned the default value into the message: we called #encode to ensure that the string would have the correct encoding and be frozen. I optimized these unnecessary objects away with two fixes: 1. Memoize the empty string so that we don't create a new empty string for every field's default. 2. If we are assigning a string to a message object, avoid creating a new string if the assigned string has the correct encoding and is already frozen.
153 lines
6.2 KiB
C
153 lines
6.2 KiB
C
// Protocol Buffers - Google's data interchange format
|
|
// Copyright 2014 Google Inc. All rights reserved.
|
|
// https://developers.google.com/protocol-buffers/
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above
|
|
// copyright notice, this list of conditions and the following disclaimer
|
|
// in the documentation and/or other materials provided with the
|
|
// distribution.
|
|
// * Neither the name of Google Inc. nor the names of its
|
|
// contributors may be used to endorse or promote products derived from
|
|
// this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
#include "protobuf.h"
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// Global map from upb {msg,enum}defs to wrapper Descriptor/EnumDescriptor
|
|
// instances.
|
|
// -----------------------------------------------------------------------------
|
|
|
|
// This is a hash table from def objects (encoded by converting pointers to
|
|
// Ruby integers) to MessageDef/EnumDef instances (as Ruby values).
|
|
VALUE upb_def_to_ruby_obj_map;
|
|
|
|
VALUE cError;
|
|
VALUE cParseError;
|
|
VALUE cTypeError;
|
|
|
|
void add_def_obj(const void* def, VALUE value) {
|
|
rb_hash_aset(upb_def_to_ruby_obj_map, ULL2NUM((intptr_t)def), value);
|
|
}
|
|
|
|
VALUE get_def_obj(const void* def) {
|
|
return rb_hash_aref(upb_def_to_ruby_obj_map, ULL2NUM((intptr_t)def));
|
|
}
|
|
|
|
static VALUE cached_empty_string = Qnil;
|
|
static VALUE cached_empty_bytes = Qnil;
|
|
|
|
static VALUE create_frozen_string(const char* str, size_t size, bool binary) {
|
|
VALUE str_rb = rb_str_new(str, size);
|
|
|
|
rb_enc_associate(str_rb,
|
|
binary ? kRubyString8bitEncoding : kRubyStringUtf8Encoding);
|
|
rb_obj_freeze(str_rb);
|
|
return str_rb;
|
|
}
|
|
|
|
VALUE get_frozen_string(const char* str, size_t size, bool binary) {
|
|
if (size == 0) {
|
|
return binary ? cached_empty_bytes : cached_empty_string;
|
|
} else {
|
|
// It is harder to memoize non-empty strings. The obvious approach would be
|
|
// to use a Ruby hash keyed by string as memo table, but looking up in such a table
|
|
// requires constructing a string (the very thing we're trying to avoid).
|
|
//
|
|
// Since few fields have defaults, we will just optimize the empty string
|
|
// case for now.
|
|
return create_frozen_string(str, size, binary);
|
|
}
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// Utilities.
|
|
// -----------------------------------------------------------------------------
|
|
|
|
// Raises a Ruby error if |status| is not OK, using its error message.
|
|
void check_upb_status(const upb_status* status, const char* msg) {
|
|
if (!upb_ok(status)) {
|
|
rb_raise(rb_eRuntimeError, "%s: %s\n", msg, upb_status_errmsg(status));
|
|
}
|
|
}
|
|
|
|
// String encodings: we look these up once, at load time, and then cache them
|
|
// here.
|
|
rb_encoding* kRubyStringUtf8Encoding;
|
|
rb_encoding* kRubyStringASCIIEncoding;
|
|
rb_encoding* kRubyString8bitEncoding;
|
|
|
|
// Ruby-interned string: "descriptor". We use this identifier to store an
|
|
// instance variable on message classes we create in order to link them back to
|
|
// their descriptors.
|
|
//
|
|
// We intern this once at module load time then use the interned identifier at
|
|
// runtime in order to avoid the cost of repeatedly interning in hot paths.
|
|
const char* kDescriptorInstanceVar = "descriptor";
|
|
ID descriptor_instancevar_interned;
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// Initialization/entry point.
|
|
// -----------------------------------------------------------------------------
|
|
|
|
// This must be named "Init_protobuf_c" because the Ruby module is named
|
|
// "protobuf_c" -- the VM looks for this symbol in our .so.
|
|
void Init_protobuf_c() {
|
|
VALUE google = rb_define_module("Google");
|
|
VALUE protobuf = rb_define_module_under(google, "Protobuf");
|
|
VALUE internal = rb_define_module_under(protobuf, "Internal");
|
|
|
|
descriptor_instancevar_interned = rb_intern(kDescriptorInstanceVar);
|
|
DescriptorPool_register(protobuf);
|
|
Descriptor_register(protobuf);
|
|
FileDescriptor_register(protobuf);
|
|
FieldDescriptor_register(protobuf);
|
|
OneofDescriptor_register(protobuf);
|
|
EnumDescriptor_register(protobuf);
|
|
MessageBuilderContext_register(internal);
|
|
OneofBuilderContext_register(internal);
|
|
EnumBuilderContext_register(internal);
|
|
FileBuilderContext_register(internal);
|
|
Builder_register(internal);
|
|
RepeatedField_register(protobuf);
|
|
Map_register(protobuf);
|
|
|
|
cError = rb_const_get(protobuf, rb_intern("Error"));
|
|
cParseError = rb_const_get(protobuf, rb_intern("ParseError"));
|
|
cTypeError = rb_const_get(protobuf, rb_intern("TypeError"));
|
|
|
|
rb_define_singleton_method(protobuf, "discard_unknown",
|
|
Google_Protobuf_discard_unknown, 1);
|
|
rb_define_singleton_method(protobuf, "deep_copy",
|
|
Google_Protobuf_deep_copy, 1);
|
|
|
|
kRubyStringUtf8Encoding = rb_utf8_encoding();
|
|
kRubyStringASCIIEncoding = rb_usascii_encoding();
|
|
kRubyString8bitEncoding = rb_ascii8bit_encoding();
|
|
|
|
rb_gc_register_address(&upb_def_to_ruby_obj_map);
|
|
upb_def_to_ruby_obj_map = rb_hash_new();
|
|
|
|
rb_gc_register_address(&cached_empty_string);
|
|
rb_gc_register_address(&cached_empty_bytes);
|
|
cached_empty_string = create_frozen_string("", 0, false);
|
|
cached_empty_bytes = create_frozen_string("", 0, true);
|
|
}
|