// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/arch/ia32/ia32_o1_jit/cg_prepass.cpp,v 1.2 2001/08/13 09:59:51 xhshi Exp $
//

#include "defines.h"
#include "jit_intf.h"
#include <iostream.h>
#include <assert.h>
#include "cg_prepass.h"
#include "bit_vector.h"
#include "regalloc.h"

#include "gc_tags.h"

// Cloning of variables to eliminate GC tags.
#ifdef VAR_CLONING
#include "code_emitter.h"
#include "stack.h"
#include "lazy_code_selector.h"
#include "cg_method_invocation.h"
#endif //VAR_CLONING

#include "register_allocator.h"
#include "profiling.h"
#include "jvmdi_clean.h"
#include "jit_common.h"
#include "internal_jit_intf.h"

int is_ref_type(Java_Type type) {
	if ((type == JAVA_TYPE_ARRAY) || (type == JAVA_TYPE_STRING) || (type == JAVA_TYPE_CLASS))
		return 1;
	return 0;
}

static Java_Type getConstantValue (
                    Class_Handle  classHandle,    /* IN  */
                    unsigned      CPindex,        /* IN  */
                    void *        valueBuffAddr)  /* OUT */
{
    Java_Type t = class_get_const_type(classHandle, CPindex);

    if(t == JAVA_TYPE_STRING) {
        return JAVA_TYPE_STRING;
    } else {
        const uint32 *a = (const uint32 *)class_get_const_addr(classHandle, CPindex);
        uint32 *dst = (uint32 *)valueBuffAddr;
        *dst = *a;
        if(t == JAVA_TYPE_DOUBLE || t == JAVA_TYPE_LONG)
            *++dst = *++a;
        return t;
    }
}

Bit_Vector *create_and_set_ref_stack_bv(Mem_Manager& mm, 
                                        char *ref_stack,
                                        unsigned stack_depth) {
	// store info on types of values on stack at garbage collectable site
	Bit_Vector *bv = new (mm) Bit_Vector(stack_depth,mm);
	for (unsigned i = 0; i < stack_depth; i++) {
		if (ref_stack[i] == 1)
			bv->set(i);
	}
    return bv;
}

CG_Prepass::CG_Prepass(unsigned maxLocals,const unsigned char *first,
					   unsigned length,Class_Handle ch,Method_Handle mh,
                       Compile_Handle comp,
					   Mem_Manager& mem_manager,unsigned maxStk,
                       Register_Allocator *regalloc)
	: first_bc(first), code_length(length),class_handle(ch),comp_handle(comp),
      num_call_sites(0), num_blocks(0), num_edges(0), num_entries_back_edge(0),
      maxStack(maxStk),_mm(mem_manager), prepass_failed(0), num_returns(0),
      num_get_put_static(0), recomp_entries(NULL) {

#ifdef VAR_CLONING
	unsigned num_args = n_words_of_method_arg_type(mh);
	var_cloning_failed = false;
	local_var_info = (char*)mem_manager.alloc(maxLocals);
	{
		for (unsigned j=0; j < maxLocals; j++) {
			local_var_info[j] = 0;
		}
	}
#endif //VAR_CLONING

	n_aloaded = 0;
	aloaded_vars = (char*)mem_manager.alloc(maxLocals);
	for (unsigned j=0; j < maxLocals; j++) {
		aloaded_vars[j] = 0;
	}

	_ro_data_size = _rw_data_size = 0;

	_bytecode_info = (Bytecode_Info*)mem_manager.alloc(code_length * sizeof(Bytecode_Info));
	is_visited = new(mem_manager) Bit_Vector(code_length,mem_manager);	// auto cleared
	ref_stack = (char*)mem_manager.alloc(maxStack * sizeof(char));
	gc_site_vectors = new (mem_manager) Bit_Vector_List();
	finally_bc_list = NULL;

	unsigned n_ref_count = maxLocals + GC_Tags_n_words(maxLocals);
	ref_count = (unsigned*)mem_manager.alloc(n_ref_count* sizeof(unsigned));
	for (unsigned k=0; k < n_ref_count; k++) {
		ref_count[k] = 0;
	}

	// initialization
	for (unsigned i = 0; i < code_length; i++)
		_bytecode_info[i].init = 0;

    //
    // statistics code doesn't need to deal with GC support because there is
    // no calls introduced
    //
    if (instrumenting && !statistics && !recompilation_thread) {
	    Bit_Vector *bv = create_and_set_ref_stack_bv(mem_manager, ref_stack, 0);
        recomp_entries = new (mem_manager) Recomp_Entry(bv,-1,recomp_entries);
        num_call_sites++;
    }

	// perform prepass (from first_bc and all exception handler)
	find_labels(first_bc,0);

    if (prepass_failed)
        return;
	unsigned cEH = method_get_num_handlers(mh);
	for(unsigned e=0; e<cEH; e++) {
		unsigned tryBegOffsPtr, tryEndOffsPtr, handlerOffsPtr, handlerTypePtr;
		method_get_handler_info(mh,e, &tryBegOffsPtr, &tryEndOffsPtr,
			&handlerOffsPtr, &handlerTypePtr);
		if (_bytecode_info[tryBegOffsPtr].attr.is_try_start == 0) {
			// first time we see this try-catch-finally block
			num_blocks++;		// try-catch-finally metablock
			if (_bytecode_info[tryBegOffsPtr].attr.is_block_entry == 0) {
				// force a new basic block at the beginning of a try-catch-finally block
				_bytecode_info[tryBegOffsPtr].attr.is_block_entry = 1;
				num_blocks++;
				// count edges into and out of tcfb
				num_edges += 2;
			}

			// mark start and end of try block. this depends on the fact that catch-all blocks
			// come after regular catch blocks in the exception table.
			_bytecode_info[tryBegOffsPtr].attr.is_try_start = 1;
			_bytecode_info[tryEndOffsPtr].attr.is_try_end = 1;
		}
        //
        // end of the try-block ends a basic block as well
        //
        unsigned instAfterEnd = tryEndOffsPtr + instruction_length(first_bc,tryEndOffsPtr);
        if (instAfterEnd < code_length &&
		    _bytecode_info[instAfterEnd].attr.is_block_entry == 0) {
				// force a new basic block at the beginning of a try-catch-finally block
				_bytecode_info[instAfterEnd].attr.is_block_entry = 1;
				num_blocks++;
				// count edges into and out of tcfb
				num_edges += 2;
        }


#if 1 // JMS
        // Every exception handler must be a GC-safe point.
        store_gc_site_info(0, first_bc + handlerOffsPtr);
        num_call_sites++;
#endif
		ref_stack[0] = 1;	// mark exception object placed on stack by VM
		find_labels(first_bc + handlerOffsPtr,1);
        if (prepass_failed)
            return;
		// pass 1 as stack_depth because stack contains only exception object
        Bytecode_Info *byte_info = &_bytecode_info[handlerOffsPtr];
        byte_info->attr.is_exception_handler_entry = 1;
	}

	// if this is a synchronized method, add two call sites for the monitorenter/exit
	DWORD method_flags = method_get_flags(mh);
	if (method_flags & ACC_SYNCHRONIZED) {
		// must increment once for monitorenter, and num_returns for monitorexits
		num_call_sites += num_returns + 1;
	}

    if (jvmdi_support) {
        // 
        // We will insert call to track JVMDI_EVENT_METHOD_ENTRY and
        // JVMDI_EVENT_METHOD_EXIT so we add one for the entry and num_returns
        // for the exits.
        //
        num_call_sites += num_returns + 1;
    }

    if (regalloc->need_sorted_gc_sites())
    {
        sorted_gc_sites = NULL;
        if (num_call_sites > 0) {
            unsigned size = num_call_sites * sizeof(Call_BV_List_Element*);
            sorted_gc_sites = (Call_BV_List_Element**)mem_manager.alloc(size);
            sort_gc_sites_info();
        }
    }
}

CG_Prepass::~CG_Prepass() {}

unsigned CG_Prepass::estimate_mem_size(unsigned byteCodeSize,
									   unsigned maxStk,
									   unsigned maxLcls) {
	unsigned size = byteCodeSize * sizeof(Bytecode_Info);
	size += Bit_Vector::mem_size(byteCodeSize);
	size += Bit_Vector::mem_size(maxStk) * 10;	// assume 10 call sites on average
	size += (maxStk) * sizeof(unsigned);
#ifdef VAR_CLONING
	size += maxLcls;
#endif //VAR_CLONING
	size += byteCodeSize + 16; // MOVE_RUNTIME_THROWS
	return size;
}

void CG_Prepass::store_gc_site_info(unsigned stack_depth, const unsigned char *bc)
{
	Bit_Vector *bv = create_and_set_ref_stack_bv(_mm, ref_stack, stack_depth);
	Call_BV_List_Element *bvle = new (_mm) Call_BV_List_Element(bv,bc);
	gc_site_vectors->push(bvle);
}
static int partition(Call_BV_List_Element *A[], int p, int r) {
	Call_BV_List_Element *x = A[(p+r)>>1];
	int i = p - 1;
	int j = r + 1;
	while (true) {
		do {j = j - 1;} while (A[j]->id > x->id);
		do {i = i + 1;} while (A[i]->id < x->id);
		if ( i < j) {
			Call_BV_List_Element * tmp;
			tmp = A[i];	A[i] = A[j]; A[j] = tmp;
		} else
			return j;
	}
}
static void qsort(Call_BV_List_Element *A[], int p, int r) {
	if (p < r) {
		int q = partition(A,p,r);
		qsort(A,p,q);
		qsort(A,q+1,r);
	}
}
void CG_Prepass::sort_gc_sites_info() {
	if (num_call_sites == 0) return;
	//
	// initialization before sorting
	//
	unsigned i = 0;
	for (Call_BV_List_Element *bvle = (Call_BV_List_Element*)gc_site_vectors->front;
		 bvle != NULL; bvle = (Call_BV_List_Element*)bvle->next)
		sorted_gc_sites[i++] = bvle;
	assert(i <= num_call_sites);
	//
	// quick sort
	//
	qsort(sorted_gc_sites,0,i-1);
	for (;i < num_call_sites; i++)
		sorted_gc_sites[i] = NULL;
}
//
// use binary search to find the corresponding gc info for call_bc
//
Call_BV_List_Element *CG_Prepass::find_gc_sites_info(const unsigned char *call_bc,
														unsigned& hint) {
#if (!(REG_ALLOC_METHOD == REG_ALLOC_SIMPLE || REG_ALLOC_METHOD == REG_ALLOC_CHOW || LOCAL_CALLEE))
    return NULL;
#endif // REG_ALLOC_METHOD
	unsigned old = hint;
	// check hint first to see if it is what we are looking for.  If not, then 
	// use binary search to find gc info for call_bc
	Call_BV_List_Element *bvle;
	if (hint < num_call_sites) {
		bvle = sorted_gc_sites[hint];
		if (bvle != NULL && bvle->id == call_bc) {
			hint++; // next time call the routine, we start from hint
			return bvle;
		}
        if (hint+1 < num_call_sites)
        {
            bvle = sorted_gc_sites[hint+1];
            if (bvle != NULL && bvle->id == call_bc) {
                hint+=2; // next time call the routine, we start from hint
                return bvle;
            }
        }
	}

	int low = 0, up = num_call_sites - 1;
	while (low <= up) {
		unsigned mid = (low + up) / 2;
		bvle = sorted_gc_sites[mid];
		if (bvle == NULL || bvle->id > call_bc) 
			up = mid - 1;
		else if (bvle->id == call_bc) {
			hint = mid+1; // next time call the routine. Hopefully we are looking for hint+1
			return bvle;
		} else  // bvle->id <  call_bc
			low = mid + 1;
	}
	return NULL;
}

void CG_Prepass::find_labels(const unsigned char *bc,
							 unsigned stack_depth) {
	unsigned bc_offset = bc - first_bc;
	Bytecode_Info *byte_info = &_bytecode_info[bc_offset];
	//
	// count basic blocks and cfg edges
	//
	if (!byte_info->attr.is_block_entry) {
		//
		// even if this bytecode has been visited, if it is not marked
		// as a block entry, then it is a new basic block.
		//
		num_blocks++;
		if (is_visited->is_set(bc_offset)) {
			// this is the case where we have jumped into the middle
			// of what we though was a single block. instead, there
			// are two blocks, the first of which has only the second
			// as a successor. add the edge between these two "newly"
			// discovered blocks
			num_edges++;
		}
	}
	//
	// is a label as well as block entry
	// 
	byte_info->attr.is_label = byte_info->attr.is_block_entry = 1;
	//
	// check if the current bb has been visited
	//
	if (is_visited->is_set(bc_offset))
		return;
		
	int offset;		// for branches: offset of branch
	unsigned index; // index of constant pools or variables

	//
	// traverse code within the current bb
	//
	const unsigned char *last_bc = first_bc + code_length;
	while (!is_visited->is_set(bc_offset) && bc < last_bc) {

		const unsigned char *curr_bc = bc;	// ptr to current bytecode
		is_visited->set(bc_offset);
		//
		// set the stack depth for each bytecode
		//
		byte_info->attr.depth = stack_depth; 
		unsigned char bytecode = *bc++;			// value of current bytecode
        char *old_ref_stack;
		switch (bytecode) {
		case 0x00:	// nop
			break;
		//
		// constant loads
		//
		case 0x01:	// aconst_null
			ref_stack[stack_depth++] = 1;
			break;
		case 0x02: case 0x03: case 0x04: case 0x05:	
		case 0x06: case 0x07: case 0x08:
			// iconst -1,0,...,5
			ref_stack[stack_depth++] = 0;
			break;
		case 0x09: case 0x0a: 
			// lconst 0,1
			// push the higher 32 bits which are zero
			// push the lower 32 bits
			ref_stack[stack_depth++] = 0;
			ref_stack[stack_depth++] = 0;
			break;
		case 0x0b:	// fconst 0.0F
		case 0x0c:	// fconst 1.0F
		case 0x0d:	// fconst 2.0F
			// push float const unto the stack
			ref_stack[stack_depth++] = 0;
			// requires 4 bytes in the ro data block
			_ro_data_size += 4;
			break;
		case 0x0e:	// dconst 0.0
		case 0x0f:	// dconst 1.0
			// push the higher 32 bits 
			// push the lower  32 bits
			ref_stack[stack_depth++] = 0;
			ref_stack[stack_depth++] = 0;
			// requires 4 bytes in the ro data block
			_ro_data_size += 8;
			break;
		//
		// stack pushes
		//
		case 0x10: 		// bipush
			// stack:  ... ==> ..., value
			ref_stack[stack_depth++] = 0;
			bc++;
			break;
		case 0x11:		// sipush
			// stack:  ... ==> ..., value
			ref_stack[stack_depth++] = 0;
			bc += 2;
			break;
		case 0x12:						// ldc
			{
			//
			// load constant from constant pool
			// stack:  ... ==> ..., value
			//
			index = *bc;
			double	val;
			Java_Type ct = getConstantValue(class_handle,index,&val);
			if (is_ref_type(ct)) {
				store_gc_site_info(stack_depth, bc - 1);
				ref_stack[stack_depth++] = 1;
				num_call_sites++;
			} else {
				ref_stack[stack_depth++] = 0;
			}
			_ro_data_size += num_words_of_type(ct) << 2;
			bc++;
			}
			break;
		case 0x13:			// ldc_w
			{
			// stack:  ... ==> ..., value
			index = (*bc << 8) + bc[1];
			double	val;
			Java_Type ct = getConstantValue(class_handle,index,&val);
			if (is_ref_type(ct)) {
				store_gc_site_info(stack_depth, bc - 1);
				ref_stack[stack_depth++] = 1;
				num_call_sites++;
			} else {
				ref_stack[stack_depth++] = 0;
			}
			_ro_data_size += num_words_of_type(ct) << 2;
			bc += 2;
			}
			break;
		case 0x14:			// ldc2_w
			// stack: ... ==> ..., value.word1, value.word2
			index = (*bc << 8) + bc[1];
			ref_stack[stack_depth++] = 0;
			ref_stack[stack_depth++] = 0;
			_ro_data_size += 8;
			bc += 2;
			break;
		case 0x15:	// iload
		case 0x17:	// fload
			// stack:  ... ==> ..., value
			ref_stack[stack_depth++] = 0;
			if (bytecode == 0x15) ref_count[*bc]++;
#ifdef VAR_CLONING
			{
				unsigned var_idx = (unsigned)*bc;
				unsigned lvi_int = local_var_info[var_idx];
				Local_Var_Info &lvi = *((Local_Var_Info*)(&lvi_int));
				lvi.non_ref_load = 1;
				local_var_info[var_idx] = lvi_int;
			}
#endif //VAR_CLONING
			bc++;
			break;
		case 0x19:	// aload
			// stack:  ... ==> ..., value
			// this variable is aloaded
			if (aloaded_vars[*bc] == 0)
				n_aloaded++;
			aloaded_vars[*bc] = 1;
			ref_stack[stack_depth++] = 1;
			ref_count[*bc]++;
#ifdef VAR_CLONING
			{
				unsigned var_idx = (unsigned)*bc;
				unsigned lvi_int = local_var_info[var_idx];
				Local_Var_Info &lvi = *((Local_Var_Info*)(&lvi_int));
				lvi.ref_load = 1;
				local_var_info[var_idx] = lvi_int;
			}
#endif //VAR_CLONING
			bc++;
			break;
		case 0x16:	// lload
		case 0x18:	// dload
			// push higher 32 bits
			// push lower  32 bits
			// stack:  ... ==> ..., value.w1, value.w2
			ref_stack[stack_depth++] = 0;
			ref_stack[stack_depth++] = 0;
#ifdef VAR_CLONING
			{
				unsigned var_idx = (unsigned)*bc;
				unsigned lvi_int_hi = local_var_info[var_idx];
				Local_Var_Info &lvi_hi = *((Local_Var_Info*)(&lvi_int_hi));
				lvi_hi.non_ref_load = 1;
				local_var_info[var_idx] = lvi_int_hi;
				unsigned lvi_int_lo = local_var_info[var_idx+1];
				Local_Var_Info &lvi_lo = *((Local_Var_Info*)(&lvi_int_lo));
				lvi_lo.non_ref_load = 1;
				local_var_info[var_idx+1] = lvi_int_lo;
			}
#endif //VAR_CLONING
			bc++;
			break;
		case 0x1a: case 0x1b: case 0x1c: case 0x1d:	// iload_{0,1,2,3}
		case 0x22: case 0x23: case 0x24: case 0x25:	// fload_{0,1,2,3}
			// stack:  ... ==> ..., value
			ref_stack[stack_depth++] = 0;
			if (bytecode <= 0x1d) {
				int index = (bytecode-0x1a)&0x03;
				ref_count[index]++;
			}
#ifdef VAR_CLONING
			{
				unsigned var_idx = (bytecode-0x1a)&0x03;
				unsigned lvi_int = local_var_info[var_idx];
				Local_Var_Info &lvi = *((Local_Var_Info*)(&lvi_int));
				lvi.non_ref_load = 1;
				local_var_info[var_idx] = lvi_int;
			}
#endif //VAR_CLONING
			break;
		case 0x2a: case 0x2b: case 0x2c: case 0x2d:	// aload_{0,1,2,3}
			// stack:  ... ==> ..., value
			if (aloaded_vars[bytecode - 0x2a] == 0)
				n_aloaded++;
			aloaded_vars[bytecode - 0x2a] = 1;
			ref_stack[stack_depth++] = 1;
			ref_count[bytecode - 0x2a]++;
#ifdef VAR_CLONING
			{
				unsigned var_idx = bytecode - 0x2a;
				unsigned lvi_int = local_var_info[var_idx];
				Local_Var_Info &lvi = *((Local_Var_Info*)(&lvi_int));
				lvi.ref_load = 1;
				local_var_info[var_idx] = lvi_int;
			}
#endif //VAR_CLONING
			break;
		case 0x1e: case 0x1f: case 0x20: case 0x21:	// lload_{0,1,2,3}
		case 0x26: case 0x27: case 0x28: case 0x29:	// dload_{0,1,2,3}
			// stack:  ... ==> ..., value.w1, value.w2
			ref_stack[stack_depth++] = 0;
			ref_stack[stack_depth++] = 0;
#ifdef VAR_CLONING
			{
				unsigned var_idx = (bytecode-0x1e)&0x03;
				unsigned lvi_int_hi = local_var_info[var_idx];
				Local_Var_Info &lvi_hi = *((Local_Var_Info*)(&lvi_int_hi));
				lvi_hi.non_ref_load = 1;
				local_var_info[var_idx] = lvi_int_hi;
				unsigned lvi_int_lo = local_var_info[var_idx+1];
				Local_Var_Info &lvi_lo = *((Local_Var_Info*)(&lvi_int_lo));
				lvi_lo.non_ref_load = 1;
				local_var_info[var_idx+1] = lvi_int_lo;
			}
#endif //VAR_CLONING
			break;
			//
			// array load
			//
		case 0x32:	// aaload
			// stack:  ...,arrayref,index ==> ..., value
			assert(ref_stack[stack_depth - 2] == 1);
			assert(ref_stack[stack_depth - 1] == 0);
			stack_depth--;
			//
			// store info after array access args are popped. in no cases do we
			// want them to be enumerated. and, if garbage collection does
			// happen, the access was out of range, so there is no object on the
			// stack. don't enumerate one. don't worry about arrayref, because
			// if the range check fails, then it will be discarded.
			//
			store_gc_site_info(stack_depth-1, bc - 1);
			num_call_sites++;
			break;
		case 0x2e:	// iaload
		case 0x30:	// faload
		case 0x33:	// baload
		case 0x34:	// caload
		case 0x35:	// saload
			// stack:  ...,arrayref,index ==> ..., value
			assert(ref_stack[stack_depth - 2] == 1);
			assert(ref_stack[stack_depth - 1] == 0);
			ref_stack[stack_depth - 2] = 0;
			stack_depth--;
			// store info after array access args are popped. in no case do we
			// want them to be enumerated.
			store_gc_site_info(stack_depth, bc - 1);
			num_call_sites++;
			break;
		case 0x2f:	// laload
		case 0x31:	// daload
			// stack:  ...,arrayref,index ==> ..., value.w1,value.w2
			// stack_depth does not change
			assert(ref_stack[stack_depth - 2] == 1);
			assert(ref_stack[stack_depth - 1] == 0);
			ref_stack[stack_depth - 2] = 0;
			// store info after array access args are popped. in no case do we
			// want them to be enumerated.
			store_gc_site_info(stack_depth, bc - 1);
			num_call_sites++;
			break;
		case 0x36:	// istore
		case 0x3a:	// astore
		case 0x38:	// fstore
			// stack:  ...,value ==> ...  (istore,fstore)
			// stack:  ...,objectref ==> ...  (astore)
			stack_depth--;
			if (bytecode != 0x38) ref_count[*bc]++;
#ifdef VAR_CLONING
			{
				unsigned var_idx = (unsigned)*bc;
				unsigned lvi_int = local_var_info[var_idx];
				Local_Var_Info &lvi = *((Local_Var_Info*)(&lvi_int));
				if (bytecode == 0x3a) {
					lvi.ref_store = 1;
				} else {
					lvi.non_ref_store = 1;
				}
				local_var_info[var_idx] = lvi_int;
			}
#endif //VAR_CLONING
			bc++;
			break;
		case 0x39:	// dstore
		case 0x37:	// lstore
			// stack:  ...,value.w1, value.w2 ==> ... 
			stack_depth -= 2;
#ifdef VAR_CLONING
			{
				unsigned var_idx = (unsigned)*bc;
				unsigned lvi_int_hi = local_var_info[var_idx];
				Local_Var_Info &lvi_hi = *((Local_Var_Info*)(&lvi_int_hi));
				lvi_hi.non_ref_store = 1;
				local_var_info[var_idx] = lvi_int_hi;
				unsigned lvi_int_lo = local_var_info[var_idx+1];
				Local_Var_Info &lvi_lo = *((Local_Var_Info*)(&lvi_int_lo));
				lvi_lo.non_ref_store = 1;
				local_var_info[var_idx+1] = lvi_int_lo;
			}
#endif //VAR_CLONING
			bc++;
			break;
		case 0x3b: case 0x3c: case 0x3d: case 0x3e:	// istore_{0,1,2,3}
		case 0x4b: case 0x4c: case 0x4d: case 0x4e:	// astore_{0,1,2,3}
			// stack:  ...,value ==> ... 
			stack_depth--;
			 ref_count[(bytecode - 0x3b) & 0x03]++;
#ifdef VAR_CLONING
			{
				unsigned var_idx = (bytecode - 0x3b) & 0x03;
				unsigned lvi_int = local_var_info[var_idx];
				Local_Var_Info &lvi = *((Local_Var_Info*)(&lvi_int));
				if (bytecode >= 0x4b) {
					lvi.ref_store = 1;
				} else {
					lvi.non_ref_store = 1;
				}
				local_var_info[var_idx] = lvi_int;
			}
#endif //VAR_CLONING
			 break;
		case 0x43: case 0x44: case 0x45: case 0x46: // fstore_{0,1,2,3}
			// stack:  ...,value ==> ... 
			stack_depth--;
#ifdef VAR_CLONING
			{
				unsigned var_idx = bytecode - 0x43;
				unsigned lvi_int = local_var_info[var_idx];
				Local_Var_Info &lvi = *((Local_Var_Info*)(&lvi_int));
				lvi.non_ref_store = 1;
				local_var_info[var_idx] = lvi_int;
			}
#endif //VAR_CLONING
			break;
		case 0x3f: case 0x40: case 0x41: case 0x42: // lstore_{0,1,2,3}
		case 0x47: case 0x48: case 0x49: case 0x4a:	// dstore_{0,1,2,3}
			// stack:  ...,value.w1, value.w2 ==> ... 
			stack_depth -= 2;
#ifdef VAR_CLONING
			{
				unsigned var_idx = (bytecode - 0x3f) & 0x03;
				unsigned lvi_int_hi = local_var_info[var_idx];
				Local_Var_Info &lvi_hi = *((Local_Var_Info*)(&lvi_int_hi));
				lvi_hi.non_ref_store = 1;
				local_var_info[var_idx] = lvi_int_hi;
				unsigned lvi_int_lo = local_var_info[var_idx+1];
				Local_Var_Info &lvi_lo = *((Local_Var_Info*)(&lvi_int_lo));
				lvi_lo.non_ref_store = 1;
				local_var_info[var_idx+1] = lvi_int_lo;
			}
#endif //VAR_CLONING
			break;
		case 0x4f:	// iastore
		case 0x51:	// fastore
		case 0x53:	// aastore
		case 0x54:	// bastore
		case 0x55:	// castore
		case 0x56:	// sastore
			// stack:  ...,arrayref,index,value ==> ...
			stack_depth -= 3;
			// store info after array access args are popped. in no case do we
			// want them to be enumerated.
			store_gc_site_info(stack_depth, bc - 1);
			num_call_sites++;
			break;
		case 0x50:	// lastore
		case 0x52:	// dastore
			// stack:  ...,arrayref,index,value.w1,value.w2 ==> ...
			stack_depth -= 4;
			// store info after array access args are popped. in no case do we
			// want them to be enumerated.
			store_gc_site_info(stack_depth, bc - 1);
			num_call_sites++;
			break;
		//
		// stack operations
		//
		case 0x57:	// pop
			// stack:  ...,word ==> ...
			stack_depth--;
			break;
		case 0x58:	// pop2
			// stack:  ...,word2,word1 ==> ...
			stack_depth -= 2;
			break;
		case 0x59:  // dup
			// push the stack top
			// dup    stack: ...,word ==> ...,word,word
			ref_stack[stack_depth] = ref_stack[stack_depth - 1];
			stack_depth++;
			break;
		case 0x5a:	// dup_x1
			// push the stack top and put two down
			// dup_x1 stack: ...,w2,w1 ==> ...,w1,w2,w1
			ref_stack[stack_depth] = ref_stack[stack_depth - 1];
			ref_stack[stack_depth - 1] = ref_stack[stack_depth - 2];
			ref_stack[stack_depth - 2] = ref_stack[stack_depth];
			stack_depth++;
			break;
		case 0x5b: 	// dup_x2
			// push the stack top and put three down
			// dup_x2 stack: ...,w3,w2,w1 ==> ...,w1,w3,w2,w1
			ref_stack[stack_depth] = ref_stack[stack_depth - 1];
			ref_stack[stack_depth - 1] = ref_stack[stack_depth - 2];
			ref_stack[stack_depth - 2] = ref_stack[stack_depth - 3];
			ref_stack[stack_depth - 3] = ref_stack[stack_depth];
			stack_depth++;
			break;
		case 0x5c:	// dup2
			// push the top 2 stack operands
			// dup2    stack: ...,w2,w1 ==> ...,w2,w1,w2,w1
			ref_stack[stack_depth] = ref_stack[stack_depth - 2];
			ref_stack[stack_depth + 1] = ref_stack[stack_depth - 1];
			stack_depth += 2;
			break;
		case 0x5d:	// dup2_x1
			// dup2_x1 stack: ...,w3,w2,w1 ==> ...,w2,w1,w3,w2,w1
			// push the top 2 stack operands
			ref_stack[stack_depth] = ref_stack[stack_depth - 2];
			ref_stack[stack_depth + 1] = ref_stack[stack_depth - 1];
			// put three down
			ref_stack[stack_depth - 1] = ref_stack[stack_depth - 3];
			ref_stack[stack_depth - 2] = ref_stack[stack_depth + 1];
			ref_stack[stack_depth - 3] = ref_stack[stack_depth];
			stack_depth += 2;
			break;
		case 0x5e:	// dup2_x2
			// dup2_x2 stack: ...,w4,w3,w2,w1 ==> ...,w2,w1,w4,w3,w2,w1
			// push the top 2 stack operands
			ref_stack[stack_depth] = ref_stack[stack_depth - 2];
			ref_stack[stack_depth + 1] = ref_stack[stack_depth - 1];
			// put four down
			ref_stack[stack_depth - 1] = ref_stack[stack_depth - 3];
			ref_stack[stack_depth - 2] = ref_stack[stack_depth - 4];
			ref_stack[stack_depth - 3] = ref_stack[stack_depth + 1];
			ref_stack[stack_depth - 4] = ref_stack[stack_depth];
			stack_depth += 2;
			break;
		case 0x5f: // swap
			// stack:  ...,word2,word1 ==> ...,word1,word2
			// stack_depth: no change
			{
				unsigned temp = ref_stack[stack_depth - 1];
				ref_stack[stack_depth - 1] = ref_stack[stack_depth - 2];
				ref_stack[stack_depth - 2] = temp;
			}
			break;
		//
		// arithmetic operations
		//
		case 0x60:	// iadd
		case 0x62:	// fadd
		case 0x64:	// isub
		case 0x66:	// fsub
		case 0x68:	// imul
		case 0x6a:	// fmul
		case 0x6c:	// idiv
		case 0x6e:	// fdiv
		case 0x70:	// irem
		case 0x72:	// frem
			// stack:  ...,value1,value2 ==> ...,result
			stack_depth--;
			break;
		case 0x61:	// ladd
		case 0x63:	// dadd
		case 0x65:	// lsub
		case 0x67:	// dsub
		case 0x6b:	// dmul
		case 0x6f:	// ddiv
		case 0x73:	// drem
			// stack:  ...,v1.w1,v1.w2,v2.w1,v2.w1 ==> result.w1,result.w2
			stack_depth -= 2;
			break;
		case 0x69:	// lmul
		case 0x6d:	// ldiv
		case 0x71:	// lrem
			// stack:  ...,v1.w1,v1.w2,v2.w1,v2.w1 ==> result.w1,result.w2
			store_gc_site_info(stack_depth, bc - 1);
			num_call_sites++;
			stack_depth -= 2;
			break;
			//
			// neg
			//
		case 0x74:	// ineg
		case 0x76:	// fneg
		case 0x75:	// lneg
		case 0x77:	// dneg
			// stack: ...,value ==> ...,result  (ineg,fneg)
			// stack: ...,value.w1,value.w2 ==> ...,result.w1,result.w2  (lneg,dneg)
			// stack_depth: no change
			break;
		//
		// logical operations
		//
		case 0x78: // ishl
		case 0x7a: // ishr
		case 0x7c: // iushr
		case 0x7e: // iand
		case 0x80: // ior
		case 0x82: // ixor
			// stack:  ...,value1,value2 ==> ...,result
			stack_depth--;
			break;
		case 0x79: // lshl
		case 0x7b: // lshr
		case 0x7d: // lushr
			// stack:  ...,v1.w1,v1.w2,shift ==> result.w1,result.w2
			stack_depth --;
			break;
		case 0x7f: // land
		case 0x81: // lor
		case 0x83: // lxor
			// stack:  ...,v1.w1,v1.w2,v2.w1,v2.w1 ==> result.w1,result.w2
			stack_depth -= 2;
			break;
			//
			// iinc local, constant
			//
		case 0x84: // iinc
			//
			//	add var_offset[ebp],constant
			//  stack_depth:  no change
			//
			ref_count[*bc] += 2;
			bc += 2;
			break;

		case 0x85:	// i2l
		case 0x87:	// i2d
		case 0x8c:	// f2l
		case 0x8d:	// f2d
			// stack:  ...,value ==> result.w1,result.w2
			ref_stack[stack_depth++] = 0;
			break;
		case 0x86:	// i2f
		case 0x8b:	// f2i
		case 0x91:	// i2b
		case 0x92:	// i2c
		case 0x93:	// i2s
			// stack:  ...,value ==> result
			// stack_depth:  no change
            break;
		case 0x88:	// l2i
		case 0x89:	// l2f
		case 0x8e:	// d2i
		case 0x90:	// d2f
			// stack:  ...,value.w1,value.w2 ==> result
			stack_depth--;
			break;
		case 0x8a:	// l2d
		case 0x8f:	// d2l
			// stack:  ...,value.w1,value.w2 ==> result.w1,result.w2
			// stack_depth:  no change
			break;
		//
		// compare
		//
		case 0x94:	// lcmp
		case 0x97:	// dcmpl
		case 0x98:  // dcmpg
			// stack:  ...,v1.w1,v1.w2,v2.w1,v2.w2 ==> ...,result
			stack_depth -= 3;
			break;
		case 0x95:  // fcmpl
		case 0x96:	// fcmpg
			// stack:  ...,value1,value2 ==> ...,result
			stack_depth--;
			break;
		case 0x99: case 0x9a:	// if{eq,ne,lt,ge,gt,le} int comparisons against zero
		case 0x9b: case 0x9c:
		case 0x9d: case 0x9e:
			// stack: ...,value ==> ...
			stack_depth--;
			offset = (*(char*)bc << 8) + bc[1];
			bc += 2;
			num_edges++;
            old_ref_stack = (char *) _mm.alloc(maxStack * sizeof(char));
            memcpy(old_ref_stack, ref_stack, maxStack * sizeof(char));
			find_labels(curr_bc + offset,stack_depth);	// target bb
            ref_stack = old_ref_stack;
			if (!(_bytecode_info[bc - first_bc].attr.is_block_entry)) {
				// count this edge only if it will not be counted later with
				// the special case at the end of the while loop
				num_edges++;
				// fall through is a block entry (but maybe not a label)
				_bytecode_info[bc - first_bc].attr.is_block_entry = 1;
				num_blocks++;
			}
            //
            // determine if the branch is a back edge
            //
            if (offset < 0)
                mark_entries_of_back_edges(curr_bc - first_bc, offset,
                                           ref_stack, stack_depth);
			break;
		case 0x9f: case 0xa0:	// if_icmp{eq,ne,lt,ge,gt,le}
		case 0xa1: case 0xa2:	// integer conditional branch
		case 0xa3: case 0xa4:
		case 0xa5: case 0xa6:	// if_acmp{eq,ne}
			// stack:  ...,value1,value2 ==> ...
			stack_depth -= 2;
			offset = (*(char*)bc << 8) + bc[1];
			bc += 2;
			num_edges++;
            old_ref_stack = (char *) _mm.alloc(maxStack * sizeof(char));
            memcpy(old_ref_stack, ref_stack, maxStack * sizeof(char));
			find_labels(curr_bc + offset,stack_depth);	// target bb
            ref_stack = old_ref_stack;
			if (!(_bytecode_info[bc - first_bc].attr.is_block_entry)) {
				// count this edge only if it will not be counted later with
				// the special case at the end of the while loop
				num_edges++;
				// fall through is a block entry (but maybe not a label)
				_bytecode_info[bc - first_bc].attr.is_block_entry = 1;
				num_blocks++;
			}
            // determine if the branch is a back edge
            if (offset < 0)
                mark_entries_of_back_edges(curr_bc - first_bc, offset,
                                           ref_stack, stack_depth);
			break;
		case 0xa7:	// goto
			// stack:  no change
			offset = (*(char*)bc << 8) + bc[1];
			num_edges++;	// target edge
			find_labels(curr_bc + offset,stack_depth);		// target bb

            // determine if the branch is a back edge
            if (offset < 0)
                mark_entries_of_back_edges(curr_bc - first_bc, offset,
                                           ref_stack, stack_depth);
			// no need to update bc because it is the end of bb
			return; 
			break;
		case 0xa8:	// jsr
			offset = (*(char*)bc << 8) + bc[1];
			//
			// target of jsr to help with garbage collection. see comment in
			// CG_Prepass.h for complete details as to why it is needed.
			//
			{
				struct Finally_Bytecode_List *fbl =
					(struct Finally_Bytecode_List *)_mm.alloc(sizeof(struct Finally_Bytecode_List));
				fbl->next = finally_bc_list;
				finally_bc_list = fbl;
				fbl->bc = curr_bc + offset;		// store target, not offset
			}

			//
			// stack: ... ==> ...,address
			// the address of the inst I immediately following this jsr is pushed.
			// However, the callee pops the address and saves it in a local
			// variable.  At the time when the routine returns, the stack_depth
			// is the same as that prior to pushing the address.
			//
			bc += 2;
			// set to not-a-reference. the jsr places the return address on the stack,
			// although temporarily, until an astore places it into a local variable.
			// doing this really isn't vital for us, but do it just to be safe.
			ref_stack[stack_depth] = 0;
			// search target (no edge to target)
			find_labels(curr_bc + offset,stack_depth + 1);
#ifdef VAR_CLONING
			var_cloning_failed = true;
#endif //VAR_CLONING
			break;
		case 0xa9: // ret
			// bc++;
			return; // it is the end of bb
			break;
		case 0xaa: // tableswitch
			{
				// skip over padding bytes to align on 4 byte boundary
				bc = (curr_bc+1) + ((4 - (curr_bc - first_bc + 1)) & 0x03);
				// offset default label
				int default_offset = ((bc[0]<<24) + (bc[1]<<16) + (bc[2]<<8) + bc[3]);
				// low
				int low = ((bc[4]<<24) + (bc[5]<<16) + (bc[6]<<8) + bc[7]);
				// high
				int high = ((bc[8]<<24) + (bc[9]<<16) + (bc[10]<<8) + bc[11]);
				bc += 12;
				int n_entries = high - low + 1;
				
				//
				// stack: ...,index ==> ...
				//
				stack_depth--;
				//
				// visit each target
				//
				num_edges += n_entries + 1;
                old_ref_stack = (char *) _mm.alloc(maxStack * sizeof(char));
                memcpy(old_ref_stack, ref_stack, maxStack * sizeof(char));
                find_labels(curr_bc + default_offset,stack_depth);
                ref_stack = old_ref_stack;
				for (int i = 0; i < n_entries; i++) {
					//
					// allocate 4 bytes for jump table entry
					//
					_ro_data_size += 4;
					offset = ((bc[0]<<24) + (bc[1]<<16) + (bc[2]<<8) + bc[3]);
					bc += 4;
                    old_ref_stack = (char *) _mm.alloc(maxStack * sizeof(char));
                    memcpy(old_ref_stack, ref_stack, maxStack * sizeof(char));
                    find_labels(curr_bc + offset,stack_depth);
                    ref_stack = old_ref_stack;
					}
				return; // it is the end of bb
			}
			break;
		case 0xab:	// lookupswitch (key match and jump)
			{
				// skip over padding bytes to align on 4 byte boundary
				bc = (curr_bc+1) + ((4 - (curr_bc - first_bc + 1)) & 0x03);
				// offset default label
				int default_offset = ((bc[0]<<24) + (bc[1]<<16) + (bc[2]<<8) + bc[3]);
				// number of match-offset pairs in lookup table
				int npairs = ((bc[4]<<24) + (bc[5]<<16) + (bc[6]<<8) + bc[7]);
				bc += 8;
				//
				// stack: ...,key ==> ...
				//
				stack_depth--;
				num_edges += npairs + 1;
				// visit each target label
				for (int i = 0; i < npairs; i++) {
					// match key: ((bc[0]<<24) + (bc[1]<<16) + (bc[2]<<8) + bc[3]);
					// get the offset
                    int offset = ((bc[4]<<24) + (bc[5]<<16) + (bc[6]<<8) + bc[7]);
                    old_ref_stack = (char *) _mm.alloc(maxStack * sizeof(char));
                    memcpy(old_ref_stack, ref_stack, maxStack * sizeof(char));
                    find_labels(curr_bc + offset,stack_depth);
                    ref_stack = old_ref_stack;
					bc += 8;
				}
				// default label
				find_labels(curr_bc + default_offset,stack_depth);
				return; // it is the end of bb
			}
			break;
		case 0xac:	// ireturn
		case 0xae:	// freturn
		case 0xb0:	// areturn
			// stack: ...,value ==> ...             (stack_depth--;)
			// Actually, there is no need to update stack_depth
		case 0xad:	// lreturn
		case 0xaf:	// dreturn
			// stack: ...,value.w1,value.w2 ==> ... (stack_depth -= 2;)
		case 0xb1:	// return
			num_returns++;
			return; // it is the end of bb
			break;
		case 0xb2: {// getstatic
			index = (bc[0] << 8) + bc[1];
			// stack: ... ==> ...,value    or
			// stack: ... ==> ...,value.w1,value.w2
            Java_Type ft;
            Field_Handle fh = resolve_static_field(comp_handle,class_handle,index,&lexc);
            if (fh == NULL) {
                num_call_sites++;
                ft = get_java_type(const_pool_get_field_descriptor(class_handle,index));
            } else {
                ft = field_get_type(fh);
            }
			unsigned n_words = num_words_of_type(ft);
			if (is_ref_type(ft)) {
				ref_stack[stack_depth] = 1;
			} else {
				ref_stack[stack_depth] = 0;
                if (n_words==2) 
    				ref_stack[stack_depth + 1] = 0;
			}
			stack_depth += n_words;
			num_call_sites++;
            num_get_put_static++;
			bc += 2;
			}	
			break;
		case 0xb3: {// putstatic
			index = (bc[0] << 8) + bc[1];
			// stack: ...,value ==> ...   or
			// stack: ...,value.w1,value.w2 ==> ...
            Java_Type ft;
			Field_Handle fh = resolve_static_field(comp_handle,class_handle,index,&lexc);
            if (fh == NULL) {
                num_call_sites++;
                ft = get_java_type(const_pool_get_field_descriptor(class_handle,index));
            } else {
                ft = field_get_type(fh);
            }
			unsigned n_words = num_words_of_type(ft);
			stack_depth -= n_words;
			num_call_sites++;
            num_get_put_static++;
			bc += 2;
		  }	break;
		case 0xb4:	// getfield
		case 0xe3: {// getfield_quick_w
			index = (bc[0] << 8) + bc[1];
			// stack: ..., objectref ==> ...,value    or
			// stack: ..., objectref ==> ...,value.w1,value.w2
			stack_depth--;	// pop objectref
			Field_Handle fh = resolve_nonstatic_field(comp_handle,class_handle,index,&lexc);
			Java_Type ft;
            if (fh == NULL) {
                num_call_sites++;
                ft = get_java_type(const_pool_get_field_descriptor(class_handle,index));
            } else {
                ft = field_get_type(fh);
            }
			unsigned n_words = num_words_of_type(ft);
			if (is_ref_type(ft)) {
				ref_stack[stack_depth] = 1;
			} else {
				ref_stack[stack_depth] = 0;
                if (n_words==2)
    				ref_stack[stack_depth + 1] = 0;
			}
			stack_depth += n_words;
			bc += 2;
		  }	break;
		case 0xb5:	// putfield
		case 0xe4: {// putfield_quick_w
			index = (bc[0] << 8) + bc[1];
			// stack: ...,objectref,value ==> ...   or
			// stack: ...,objectref,value.w1,value.w2 ==> ...
			Field_Handle fh = resolve_nonstatic_field(comp_handle,class_handle,index,&lexc);
			Java_Type ft;
            if (fh == NULL) {
                num_call_sites++;
                ft = get_java_type(const_pool_get_field_descriptor(class_handle,index));
            } else {
                ft = field_get_type(fh);
            }
			unsigned n_words = num_words_of_type(ft);
			stack_depth -= (n_words + 1);
			bc += 2;
		  }	break;
		case 0xb6: {// invokevirtual
			index = (bc[0] << 8) + bc[1];
			num_call_sites++;
			//
			// stack: ..,objectref,[arg1,[arg2,...]] ==> ..., or
			// stack: ...,[arg1,[arg2,...]] ==> ...   (invokestatic)
			// (n_args -1) words of arguments and objectref are popped
			// from the operand stack.
			//
			Method_Handle mh = resolve_virtual_method(comp_handle,class_handle,index,&lexc);
			if (mh == NULL) {
				prepass_failed = 1;
				break;
			}
			unsigned n_words = n_words_of_method_arg_type(mh);
			stack_depth -= n_words + 1;
			// store garbage collection info after popping args
			store_gc_site_info(stack_depth, bc - 1);
			// the return value is pushed onto the stack
			Java_Type ret_type = method_get_return_type(mh);
			n_words = num_words_of_type(ret_type);
			if (is_ref_type(ret_type)) {
				ref_stack[stack_depth] = 1;
			} else if (n_words != 0) {
  				ref_stack[stack_depth] = 0;
				if (n_words==2) {
    				ref_stack[stack_depth + 1] = 0;
                }  			
			}

			stack_depth += n_words;
			bc += 2;
		  }	break;
		case 0xb7: {// invokespecial
			index = (bc[0] << 8) + bc[1];
			num_call_sites++;
			//
			// stack: ..,objectref,[arg1,[arg2,...]] ==> ..., or
			// stack: ...,[arg1,[arg2,...]] ==> ...   (invokestatic)
			// (n_args -1) words of arguments and objectref are popped
			// from the operand stack.
			//
			Method_Handle mh = resolve_special_method(comp_handle,class_handle,index,&lexc);
			if (mh == NULL) {
				prepass_failed = 1;
				break;
			}
			unsigned n_words = n_words_of_method_arg_type(mh);
			stack_depth -= n_words +1;
			// store garbage collection info after popping args
			store_gc_site_info(stack_depth, bc - 1);
			// the return value is pushed onto the stack
			Java_Type ret_type = method_get_return_type(mh);
			n_words = num_words_of_type(ret_type);
			if (is_ref_type(ret_type)) {
				ref_stack[stack_depth] = 1;
			} else if (n_words != 0) {
  				ref_stack[stack_depth] = 0;
				if (n_words==2) {
    				ref_stack[stack_depth + 1] = 0;
                }  			
			}

			stack_depth += n_words;
			bc += 2;
		  }	break;
		case 0xb8:	// invokestatic
		case 0xd9: {// invokestatic_quick
			index = (bc[0] << 8) + bc[1];
			num_call_sites++;
			// invokestatic does not have objectref
			Method_Handle mh = resolve_static_method(comp_handle,class_handle,index,&lexc);
			if (mh == NULL) {
				prepass_failed = 1;
				break;
			}
			unsigned n_words = n_words_of_method_arg_type(mh);
			stack_depth -= n_words;
			// store garbage collection info after popping args
			store_gc_site_info(stack_depth, bc - 1);
			// the return value is pushed onto the stack
			Java_Type ret_type = method_get_return_type(mh);
			n_words = num_words_of_type(ret_type);
			if (is_ref_type(ret_type)) {
				ref_stack[stack_depth] = 1;
			} else if (n_words != 0) {
  				ref_stack[stack_depth] = 0;
				if (n_words==2) {
    				ref_stack[stack_depth + 1] = 0;
                }  			
			}

			stack_depth += n_words;
			bc += 2;
		  }	break;
		case 0xb9:	// invokeinterface
		case 0xda:	// invokeinterface_quick
			// stack: ..,objectref,[arg1,[arg2,...]] ==> ...
			index = (bc[0] << 8) + bc[1];
			num_call_sites+=2;
			{
				unsigned nargs = bc[2];
				// store garbage collection info before popping args (helper call)
				store_gc_site_info(stack_depth, bc - 1);
				stack_depth -= nargs;
				// store garbage collection info after popping args (actual invoke)
				store_gc_site_info(stack_depth, bc - 1);
				Method_Handle mh = resolve_interface_method(comp_handle,class_handle,index,&lexc);
				if (mh == NULL) {
					prepass_failed = 1;
					break;
                }
				Java_Type ret_type = method_get_return_type(mh);
				unsigned n_words = num_words_of_type(ret_type);
				if (is_ref_type(ret_type)) {
					ref_stack[stack_depth] = 1;
				} else if (n_words != 0) {
  					ref_stack[stack_depth] = 0;
					if (n_words==2) {
    					ref_stack[stack_depth + 1] = 0;
					}  			
				}

				stack_depth += n_words;
			}
			// allocate 4 bytes of rw data for invokeinterface's hint
			_rw_data_size += 4;
			bc += 4;
			break;
		case 0xba:	// unused
			break;
		case 0xbb:	// new
		case 0xdd:	// new_quick
			// stack: ... ==> ...,objectref
			store_gc_site_info(stack_depth, bc - 1);
			ref_stack[stack_depth++] = 1;
			num_call_sites++;
			bc += 2;
			break;
        case 0xbc:	// newarray
			// stack: ...,count ==> ...,arrayref
			// stack_depth: no change
			store_gc_site_info(stack_depth, bc - 1);
			ref_stack[stack_depth - 1] = 1;
			bc++;
			num_call_sites++;
			break;
		case 0xbd:	// anewarray
		case 0xde:	// anewarray_quick
			// stack: ...,count ==> ...,arrayref
			// stack_depth: no change
			store_gc_site_info(stack_depth, bc - 1);
			ref_stack[stack_depth - 1] = 1;
			bc += 2;
			num_call_sites++;
			break;
		case 0xbe:	// arraylength
			// stack: ...,arrayref ==> ...,legnth
			// stack_depth: no change
			assert(ref_stack[stack_depth - 1] == 1);
			ref_stack[stack_depth - 1] = 0;
			break;
		case 0xbf:	// athrow
			assert(ref_stack[stack_depth - 1] == 1);
            num_call_sites++;
			store_gc_site_info(stack_depth, bc - 1);
			return;
			break;
		case 0xc0:	// checkcast
		case 0xe0:	// checkcast_quick
			// stack: ...,objectref ==> ...,objectref
			// stack_depth: no change
			assert(ref_stack[stack_depth - 1] == 1);
            num_call_sites++;
			store_gc_site_info(stack_depth - 1, bc - 1);
			bc += 2;
			break;
		case 0xc1:	// instanceof
		case 0xe1:	// instanceof_quick
			// stack: ...,objectref ==> ...,result
			// stack_depth: no change
			assert(ref_stack[stack_depth - 1] == 1);
			// should be conservative, but safe, to enumerate the objectref
			store_gc_site_info(stack_depth - 1, bc - 1);
			ref_stack[stack_depth - 1] = 0;
			bc += 2;
            num_call_sites++;
			break;
		case 0xc2:	// monitorenter
		case 0xc3:	// monitorexit
			// stack: ...,objecterf ==> ...
			// should be conservative, but safe, to enumerate the objectref
			store_gc_site_info(stack_depth - 1, bc - 1);
			stack_depth--;
			assert(ref_stack[stack_depth] == 1);
			num_call_sites++;
			break;
		case 0xc4:	// wide
			bytecode = *bc++;
			index = (bc[0] << 8) + bc[1];
#ifdef VAR_CLONING
			var_cloning_failed = true;
#endif //VAR_CLONING
			if (bytecode == 0x84) {
				// iinc local, constant
				// stack: no change
//				ref_count[index] += 2;
				ref_count[index] = 0; // reg alloc will ignore them
				bc += 4;
			} else {
				bc += 2;
				switch (bytecode) {
				case 0x19:	// aload
					ref_stack[stack_depth++] = 1;
					break;
				case 0x15:	// iload
				case 0x17:	// fload
					ref_stack[stack_depth++] = 0;
					break;
				case 0x16:	// lload
				case 0x18:	// dload
					ref_stack[stack_depth++] = 0;
					ref_stack[stack_depth++] = 0;
					break;
				case 0x36:	// istore
				case 0x38:	// fstore
				case 0x3a:	// astore
					stack_depth--;
					break;
				case 0x37:	// lstore
				case 0x39:	// dstore
					stack_depth -= 2;
					break;
				case 0xa9:  // ret
					return; // it is the end of bb
					break;
				} // switch
			}
			break;
		case 0xc5:	// multianewarray
		case 0xdf:	// multianewarray_quick
			// stack: ...,count1,[count2,...] ==> ...,arrayref
			store_gc_site_info(stack_depth, bc - 1);
			stack_depth -= bc[2]; // for popping count
			ref_stack[stack_depth++] = 1;  // for pushing arrayref
			num_call_sites++;
			bc += 3;
  			break;
		case 0xc6:	// ifnull
		case 0xc7:	// ifnonnull
			offset = (*(char*)bc << 8) + bc[1];
			index = (curr_bc + offset) - first_bc;
			// stack:  ...,objectref ==> ...
			stack_depth--;
			assert(ref_stack[stack_depth] == 1);
			bc += 2;
			num_edges++;	// target edge
            old_ref_stack = (char *) _mm.alloc(maxStack * sizeof(char));
            memcpy(old_ref_stack, ref_stack, maxStack * sizeof(char));
			find_labels(curr_bc + offset,stack_depth);	// target bb
            ref_stack = old_ref_stack;
			if (!(_bytecode_info[bc - first_bc].attr.is_block_entry)) {
				// count this edge only if it will not be counted later with
				// the special case at the end of the while loop
				num_edges++;
				// fall through is a block entry (but maybe not a label)
				_bytecode_info[bc - first_bc].attr.is_block_entry = 1;
				num_blocks++;
			}
            // determine if the branch is a back edge
            if (offset < 0)
                mark_entries_of_back_edges(curr_bc - first_bc, offset,
                                           ref_stack, stack_depth);
			break;
		case 0xc8:	// goto_w
			// stack: no change
			offset = ((bc[0]<<24) + (bc[1]<<16) + (bc[2]<<8) + bc[3]);
			bc += 4;
			num_edges++;	// target edge
			find_labels(curr_bc + offset,stack_depth);

            // determine if the branch is a back edge
            if (offset < 0)
                mark_entries_of_back_edges(curr_bc - first_bc, offset,
                                           ref_stack, stack_depth);
            return;  // it is the end of bb
			break;
		case 0xc9:	// jsr_w
			//
			// stack: ... ==> ...,address
			// the address of the inst I immediately following this jsr is pushed.
			// However, the callee pops the address and saves it in a local
			// variable.  At the time when the routine returns, the stack_depth
			// is the same as that prior to pushing the address.
			//
			offset = ((bc[0]<<24) + (bc[1]<<16) + (bc[2]<<8) + bc[3]);
			bc += 4;
			// set to not-a-reference. the jsr places the return address on the stack,
			// although temporarily, until an astore places it into a local variable.
			// doing this really isn't vital for us, but do it just to be safe.
			ref_stack[stack_depth] = 0;
			// visit target (no edge)
			find_labels(curr_bc + offset,stack_depth + 1);
#ifdef VAR_CLONING
			var_cloning_failed = true;
#endif //VAR_CLONING
			break;
		default:
		        printf("unknown byte code: %d\n", (int)bytecode);
			// illegal
			break;
		} // switch
        if (prepass_failed)
            return;

		bc_offset = bc - first_bc;
		byte_info = &_bytecode_info[bc_offset];
#ifdef _DEBUG
		if (stack_depth > 0x00008000) {
			printf("stack_depth negative on byte code %d at byte code offset %d\n", (int)bytecode, curr_bc - first_bc);
		}
#endif // _DEBUG
	}
	if (bc < last_bc && !prepass_failed){
		// we have exited the while loop (always guaranteed to enter
		// it if it is reached) and we are not at the end of the
		// bytecodes, so we must have run into code that has already
		// been visited. if so, then it must have been jumped to,
		// which means it is a block entry.
		assert(byte_info->attr.is_block_entry == 1);
		// this is the case where one
		// block falls thru to a block that has already been
		// discovered. must also cover case where fallthru
		// is not discovered until later when a branch target
		// is in the middle of a thought-to-be basic block,
		// thereby dividing it in two.

		num_edges++;	// all calling find_labels() does is inc num_edges
	}
}

void CG_Prepass::mark_entries_of_back_edges(unsigned bc_idx,
                                            int offset,
                                            char *ref_stack,
                                            unsigned stack_depth) {
    if (!instrumenting) return;
    unsigned target_idx = bc_idx + offset;
    assert(offset < 0);
    if (_bytecode_info[target_idx].attr.is_back_edge_entry == 0) {
        _bytecode_info[target_idx].attr.is_back_edge_entry = 1; 
        //
        // statistics code doesn't need to deal with GC support
        //
        if (!statistics && !recompilation_thread) {
	        Bit_Vector *bv = create_and_set_ref_stack_bv(_mm, ref_stack, stack_depth);
            recomp_entries = new (_mm) Recomp_Entry(bv,target_idx,recomp_entries);
        }
        num_entries_back_edge++; 
        num_call_sites++;
    }
}
