// Rewrite the byte codes of all of the methods of a class. // The rewriter must be called exactly once. Rewriting must happen after // verification but before the first method of the class is executed. void InstanceKlass::rewrite_class(TRAPS) { assert(is_loaded(), "must be loaded"); instanceKlassHandle this_oop(THREAD, this); // ... Rewriter::rewrite(this_oop, CHECK); this_oop->set_rewritten(); }
void Rewriter::rewrite(instanceKlassHandle klass, TRAPS) { ResourceMark rm(THREAD); Array<Method*>* mds = klass->methods(); ConstantPool* cp = klass->constants(); Rewriter rw(klass, cp, mds, CHECK); }
// bcp是bytecode pointer,是某条字节码在内存中的地址;bci是bytecode index,是某条字节码相对该方法的字节码起始位置的偏移量 Rewriter::Rewriter(instanceKlassHandle klass, constantPoolHandle cpool, Array<Method*>* methods, TRAPS) : _klass(klass),_pool(cpool),_methods(methods) { assert(_pool->cache() == NULL, "constant pool cache must not be set yet"); // 第1部分:determine index maps for Method* rewriting compute_index_maps(); // 第2部分 // RegisterFinalizersAtInit命令解释:先执行new分配好对象空间,然后再执行invokespecial调用构造函数, // jvm里其实可以让用户选择在这两个时机中的任意一个将当前对象传递给Finalizer.register方法来注册到Finalizer对象链里, // 这个选择依赖于RegisterFinalizersAtInit这个vm参数是否被设置,默认值为true,也就是在调用构造函数返回之前调用 // Finalizer.register方法,如果通过-XX:-RegisterFinalizersAtInit关闭了该参数,那将在对象空间分配好之后就将这个对象注册进去。 if (RegisterFinalizersAtInit && _klass->name() == vmSymbols::java_lang_Object()) { bool did_rewrite = false; int i = _methods->length(); while (i-- > 0) { Method* method = _methods->at(i); if (method->intrinsic_id() == vmIntrinsics::_Object_init) { // rewrite the return bytecodes of Object.<init> to register the // object for finalization if needed. methodHandle m(THREAD, method); rewrite_Object_init(m, CHECK); did_rewrite = true; break; } } assert(did_rewrite, "must find Object::<init> to rewrite it"); } // 第3部分 // rewrite methods, in two passes int len = _methods->length(); bool invokespecial_error = false; for (int i = len-1; i >= 0; i--) { Method* method = _methods->at(i); scan_method(method, false, &invokespecial_error); // ... } // ... // 第4部分 // allocate constant pool cache, now that we've seen all the bytecodes make_constant_pool_cache(THREAD); // ... }
// Computes a CPC map (new_index -> original_index) for constant pool entries // that are referred to by the interpreter at runtime via the constant pool cache. // Also computes a CP map (original_index -> new_index). // Marks entries in CP which require additional processing. void Rewriter::compute_index_maps() { const int length = _pool->length(); init_maps(length); bool saw_mh_symbol = false; // 通过循环一次性将常量池中的项处理完毕 for (int i = 0; i < length; i++) { constantTag ct = _pool->tag_at(i); int tag = ct.value(); switch (tag) { case JVM_CONSTANT_InterfaceMethodref: case JVM_CONSTANT_Fieldref : // fall through case JVM_CONSTANT_Methodref : // fall through add_cp_cache_entry(i); break; case JVM_CONSTANT_String: case JVM_CONSTANT_MethodHandle : // fall through case JVM_CONSTANT_MethodType : // fall through add_resolved_references_entry(i); break; // ... } } // Record limits of resolved reference map for constant pool cache indices record_map_limits(); // ... }
// _cp_map与_cap_cache_map是一对 intArray _cp_map; // for Methodref, Fieldref,InterfaceMethodref and InvokeDynamic intStack _cp_cache_map; intArray _reference_map; // maps from cp index to resolved_refs index (or -1) intStack _resolved_references_map; // for strings, methodHandle, methodType int _resolved_reference_limit; int _first_iteration_cp_cache_limit;
void init_maps(int length) { _cp_map.initialize(length, -1); // _cp_map是整数类型数组 // Choose an initial value large enough that we don't get frequent calls to grow(). _cp_cache_map.initialize(length/2); // _cp_cache_map是整数类型栈 // Also cache resolved objects, in another different cache. _reference_map.initialize(length, -1); // _reference_map是整数类型数组 _resolved_references_map.initialize(length/2); // _resolved_references_map是整数类型的栈 _resolved_reference_limit = -1; _first_iteration_cp_cache_limit = -1; }
CONSTANT_Fieldref_info { u1 tag; u2 class_index; u2 name_and_type_index; } CONSTANT_Methodref_info { u1 tag; u2 class_index; u2 name_and_type_index; } CONSTANT_InterfaceMethodref_info { u1 tag; u2 class_index; u2 name_and_type_index; }
interface Computable { void calculate(); } class Computer implements Computable { public void calculate() { } } public class Test { public static Computable x1 = new Computer(); public static Computer x2 = new Computer(); public static String x3 = "mz"; public static void main(String[] args) { x1.calculate(); x2.calculate(); System.out.println(x3); } }
public class com.test.Test // ... Constant pool: #1 = Class #2 // com/test/Test #2 = Utf8 com/test/Test #3 = Class #4 // java/lang/Object #4 = Utf8 java/lang/Object #5 = Utf8 x1 #6 = Utf8 Lcom/test/Computable; #7 = Utf8 x2 #8 = Utf8 Lcom/test/Computer; #9 = Utf8 x3 #10 = Utf8 Ljava/lang/String; #11 = Utf8 <clinit> #12 = Utf8 ()V #13 = Utf8 Code #14 = Class #15 // com/test/Computer #15 = Utf8 com/test/Computer #16 = Methodref #14.#17 // com/test/Computer."<init>":()V #17 = NameAndType #18:#12 // "<init>":()V #18 = Utf8 <init> #19 = Fieldref #1.#20 // com/test/Test.x1:Lcom/test/Computable; #20 = NameAndType #5:#6 // x1:Lcom/test/Computable; #21 = Fieldref #1.#22 // com/test/Test.x2:Lcom/test/Computer; #22 = NameAndType #7:#8 // x2:Lcom/test/Computer; #23 = String #24 // mz #24 = Utf8 mz #25 = Fieldref #1.#26 // com/test/Test.x3:Ljava/lang/String; #26 = NameAndType #9:#10 // x3:Ljava/lang/String; #27 = Utf8 LineNumberTable #28 = Utf8 LocalVariableTable #29 = Methodref #3.#17 // java/lang/Object."<init>":()V #30 = Utf8 this #31 = Utf8 Lcom/test/Test; #32 = Utf8 main #33 = Utf8 ([Ljava/lang/String;)V #34 = InterfaceMethodref #35.#37 // com/test/Computable.calculate:()V #35 = Class #36 // com/test/Computable #36 = Utf8 com/test/Computable #37 = NameAndType #38:#12 // calculate:()V #38 = Utf8 calculate #39 = Methodref #14.#37 // com/test/Computer.calculate:()V #40 = Fieldref #41.#43 // java/lang/System.out:Ljava/io/PrintStream; #41 = Class #42 // java/lang/System #42 = Utf8 java/lang/System #43 = NameAndType #44:#45 // out:Ljava/io/PrintStream; #44 = Utf8 out #45 = Utf8 Ljava/io/PrintStream; #46 = Methodref #47.#49 // java/io/PrintStream.println:(Ljava/lang/String;)V #47 = Class #48 // java/io/PrintStream #48 = Utf8 java/io/PrintStream #49 = NameAndType #50:#51 // println:(Ljava/lang/String;)V #50 = Utf8 println #51 = Utf8 (Ljava/lang/String;)V #52 = Utf8 args #53 = Utf8 [Ljava/lang/String; #54 = Utf8 SourceFile #55 = Utf8 Test.java { public static com.test.Computable x1; descriptor: Lcom/test/Computable; flags: ACC_PUBLIC, ACC_STATIC public static com.test.Computer x2; descriptor: Lcom/test/Computer; flags: ACC_PUBLIC, ACC_STATIC public static java.lang.String x3; descriptor: Ljava/lang/String; flags: ACC_PUBLIC, ACC_STATIC static {}; descriptor: ()V flags: ACC_STATIC Code: stack=2, locals=0, args_size=0 0: new #14 // class com/test/Computer 3: dup 4: invokespecial #16 // Method com/test/Computer."<init>":()V 7: putstatic #19 // Field x1:Lcom/test/Computable; 10: new #14 // class com/test/Computer 13: dup 14: invokespecial #16 // Method com/test/Computer."<init>":()V 17: putstatic #21 // Field x2:Lcom/test/Computer; 20: ldc #23 // String mz 22: putstatic #25 // Field x3:Ljava/lang/String; 25: return public com.test.Test(); descriptor: ()V flags: ACC_PUBLIC Code: stack=1, locals=1, args_size=1 0: aload_0 1: invokespecial #29 // Method java/lang/Object."<init>":()V 4: return public static void main(java.lang.String[]); descriptor: ([Ljava/lang/String;)V flags: ACC_PUBLIC, ACC_STATIC Code: stack=2, locals=1, args_size=1 0: getstatic #19 // Field x1:Lcom/test/Computable; 3: invokeinterface #34, 1 // InterfaceMethod com/test/Computable.calculate:()V 8: getstatic #21 // Field x2:Lcom/test/Computer; 11: invokevirtual #39 // Method com/test/Computer.calculate:()V 14: getstatic #40 // Field java/lang/System.out:Ljava/io/PrintStream; 17: getstatic #25 // Field x3:Ljava/lang/String; 20: invokevirtual #46 // Method java/io/PrintStream.println:(Ljava/lang/String;)V 23: return }
对于这3个常量池项来说,在Rewriter::compute_index_maps()方法中调用add_cp_cache_entry()方法进行处理。其实方法还会处理JVM_CONSTANT_MethodHandle与JVM_CONSTANT_MethodType,这2个常量池项是为了让Java语言支持动态语言特性而在Java 7 版本中新增的常量池项,只会在极其特别的情况能用到它,在Class文件中几乎不会生成这三个常量池项,暂时不介绍。
int add_cp_cache_entry(int cp_index) { int cache_index = add_map_entry(cp_index, &_cp_map, &_cp_cache_map); return cache_index; } int add_map_entry(int cp_index, intArray* cp_map, intStack* cp_cache_map) { int cache_index = cp_cache_map->append(cp_index); // cp_cache_map是整数类型的栈 cp_map->at_put(cp_index, cache_index); //cp_map是整数类型的数组 return cache_index; }
通过cp_cache_map和cp_map建立了cp_index与 cache_index的对应关系,这在后面中会有重要应用。
// add a new entry to the resolved_references map int add_resolved_references_entry(int cp_index) { int ref_index = add_map_entry(cp_index, &_reference_map, &_resolved_references_map); assert(cp_entry_to_resolved_references(cp_index) == ref_index, ""); return ref_index; } int add_map_entry(int cp_index, intArray* cp_map, intStack* cp_cache_map) { assert(cp_map->at(cp_index) == -1, "not twice on same cp_index"); int cache_index = cp_cache_map->append(cp_index); cp_map->at_put(cp_index, cache_index); return cache_index; }
void record_map_limits() { // Record initial size of the two arrays generated for the CP cache // relative to walking the constant pool. _first_iteration_cp_cache_limit = _cp_cache_map.length(); // _cp_cache_map是整数类型的栈 _resolved_reference_limit = _resolved_references_map.length(); // _resolved_references_map是整数类型的数组 }
// The new finalization semantics says that registration of // finalizable objects must be performed on successful return from the // Object.<init> constructor. We could implement this trivially if // <init> were never rewritten but since JVMTI allows this to occur, a // more complicated solution is required. A special return bytecode // is used only by Object.<init> to signal the finalization // registration point. Additionally local 0 must be preserved so it's // available to pass to the registration function. For simplicty we // require that local 0 is never overwritten so it's available as an // argument for registration. void Rewriter::rewrite_Object_init(methodHandle method, TRAPS) { RawBytecodeStream bcs(method); while (!bcs.is_last_bytecode()) { Bytecodes::Code opcode = bcs.raw_next(); switch (opcode) { case Bytecodes::_return: *bcs.bcp() = Bytecodes::_return_register_finalizer; break; // ... } } }
有些字节码指令的操作数在Class文件里跟在运行时看起来不同,因为HotSpot 在加连接类的时候会对字节码进行重写, 把某些指令的操作数从常量池下标(就是之前接触到的cp_index)改写为常量池缓存下标(就是之前介绍的cp_cache_index)。因为这些指令所需要引用的信息无法使用一个constant pool entry slot来表示,需要使用一个更大的数据结构表示常量池项的内容。在Rewriter::scan_method()中就对部分字节码进行了重写,如下:
// Rewrites a method given the index_map information void Rewriter::scan_method(Method* method, bool reverse, bool* invokespecial_error) { int nof_jsrs = 0; bool has_monitor_bytecodes = false; ////////////////////////////////////////////////////////////////////// { // We cannot tolerate a GC in this block, because we've // cached the bytecodes in 'code_base'. If the Method* // moves, the bytecodes will also move. No_Safepoint_Verifier nsv; Bytecodes::Code c; // Bytecodes and their length const address code_base = method->code_base(); const int code_length = method->code_size(); int bc_length; for (int bci = 0; bci < code_length; bci += bc_length) { address bcp = code_base + bci; int prefix_length = 0; c = (Bytecodes::Code)(*bcp); // Since we have the code, see if we can get the length // directly. Some more complicated bytecodes will report // a length of zero, meaning we need to make another method // call to calculate the length. bc_length = Bytecodes::length_for(c); if (bc_length == 0) { bc_length = Bytecodes::length_at(method, bcp); // length_at will put us at the bytecode after the one modified // by 'wide'. We don't currently examine any of the bytecodes // modified by wide, but in case we do in the future... if (c == Bytecodes::_wide) { prefix_length = 1; c = (Bytecodes::Code)bcp[1]; } } switch (c) { case Bytecodes::_lookupswitch : { #ifndef CC_INTERP Bytecode_lookupswitch bc(method, bcp); (*bcp) = ( bc.number_of_pairs() < BinarySwitchThreshold ? Bytecodes::_fast_linearswitch : Bytecodes::_fast_binaryswitch ); #endif break; } case Bytecodes::_fast_linearswitch: case Bytecodes::_fast_binaryswitch: { #ifndef CC_INTERP (*bcp) = Bytecodes::_lookupswitch; #endif break; } case Bytecodes::_invokespecial : { rewrite_invokespecial(bcp, prefix_length+1, reverse, invokespecial_error); break; } case Bytecodes::_getstatic : // fall through case Bytecodes::_putstatic : // fall through case Bytecodes::_getfield : // fall through case Bytecodes::_putfield : // fall through case Bytecodes::_invokevirtual : // fall through case Bytecodes::_invokestatic : case Bytecodes::_invokeinterface: case Bytecodes::_invokehandle : // if reverse=true rewrite_member_reference(bcp, prefix_length+1, reverse); break; // ... case Bytecodes::_ldc: case Bytecodes::_fast_aldc: // if reverse=true maybe_rewrite_ldc(bcp, prefix_length+1, false, reverse); break; case Bytecodes::_ldc_w: case Bytecodes::_fast_aldc_w: // if reverse=true maybe_rewrite_ldc(bcp, prefix_length+1, true, reverse); break; // ... case Bytecodes::_monitorenter : // fall through case Bytecodes::_monitorexit : has_monitor_bytecodes = true; break; } } } ////////////////////////////////////////////////////////////////////// // Update access flags if (has_monitor_bytecodes) { method->set_has_monitor_bytecodes(); } // ... }
// invokerspecial是作为对private和构造方法的调用,绕过了virtual dispatch; // If the constant pool entry for invokespecial is InterfaceMethodref, // we need to add a separate cpCache entry for its resolution, because it is // different than the resolution for invokeinterface with InterfaceMethodref. // These cannot share cpCache entries. It's unclear(不确定的) if all invokespecial to // InterfaceMethodrefs would resolve to the same thing so a new cpCache entry // is created for each one. This was added with lambda. void Rewriter::rewrite_invokespecial(address bcp, int offset, bool reverse, bool* invokespecial_error) { address p = bcp + offset; if (!reverse) { // 获取常量池中要调用方法的索引 int cp_index = Bytes::get_Java_u2(p); if (_pool->tag_at(cp_index).is_interface_method()) { int cache_index = add_invokespecial_cp_cache_entry(cp_index); if (cache_index != (int)(jushort) cache_index) { *invokespecial_error = true; } Bytes::put_native_u2(p, cache_index); } else { rewrite_member_reference(bcp, offset, reverse); } } // ... }
// add a new CP cache entry beyond the normal cache for the special case of // invokespecial with InterfaceMethodref as cpool operand. int add_invokespecial_cp_cache_entry(int cp_index) { assert(_first_iteration_cp_cache_limit >= 0, "add these special cache entries after first iteration"); // Don't add InterfaceMethodref if it already exists at the end. for (int i = _first_iteration_cp_cache_limit; i < _cp_cache_map.length(); i++) { if (cp_cache_entry_pool_index(i) == cp_index) { return i; } } int cache_index = _cp_cache_map.append(cp_index); assert(cache_index >= _first_iteration_cp_cache_limit, ""); // do not update _cp_map, since the mapping is one-to-many assert(cp_cache_entry_pool_index(cache_index) == cp_index, ""); return cache_index; }
// Rewrite a classfile-order CP index into a native-order CPC index. void Rewriter::rewrite_member_reference(address bcp, int offset, bool reverse) { address p = bcp + offset; if (!reverse) { int cp_index = Bytes::get_Java_u2(p); int cache_index = cp_entry_to_cp_cache(cp_index); Bytes::put_native_u2(p, cache_index); } // ... }
int cp_entry_to_cp_cache(int i) { assert(has_cp_cache(i), "oob"); return _cp_map[i]; }
// ldc指令从常量池中取值然后压入栈中 // Rewrite some ldc bytecodes to _fast_aldc void Rewriter::maybe_rewrite_ldc(address bcp, int offset, bool is_wide,bool reverse) { if (!reverse) { assert((*bcp) == (is_wide ? Bytecodes::_ldc_w : Bytecodes::_ldc), "not ldc bytecode"); address p = bcp + offset; int cp_index = is_wide ? Bytes::get_Java_u2(p) : (u1)(*p); constantTag tag = _pool->tag_at(cp_index).value(); if (tag.is_method_handle() || tag.is_method_type() || tag.is_string()) { int ref_index = cp_entry_to_resolved_references(cp_index); if (is_wide) { (*bcp) = Bytecodes::_fast_aldc_w; assert(ref_index == (u2)ref_index, "index overflow"); Bytes::put_native_u2(p, ref_index); } else { (*bcp) = Bytecodes::_fast_aldc; assert(ref_index == (u1)ref_index, "index overflow"); (*p) = (u1)ref_index; } } } // ... }
int cp_entry_to_resolved_references(int cp_index) const { assert(has_entry_in_resolved_references(cp_index), "oob"); return _reference_map[cp_index]; }
1、在Ubuntu 16.04上编译OpenJDK8的源代码
作者持续维护的个人博客 classloading.com。