Browse Source

Record more stats, improve some existing stats

- How often do_many_cycles is run and how much time it takes per call
- How many entries are currently being compiled ("pending")
- Relative frequencies of opcodes
- Reason for interpreting code (not hot, at end of page, pending or no
  block boundary)
- Number of compiled basic blocks and entry blocks
- How many pages and entries are invalidated
- How often cycle_internal, do_many_cycles and do_run are called
Fabian 6 years ago
parent
commit
df6bc4fd28
7 changed files with 113 additions and 3 deletions
  1. 35 1
      src/browser/print_stats.js
  2. 3 0
      src/config.js
  3. 16 0
      src/cpu.js
  4. 25 0
      src/native/cpu.c
  5. 9 0
      src/native/memory.c
  6. 7 0
      src/native/profiler/profiler.c
  7. 18 2
      src/native/profiler/profiler.h

+ 35 - 1
src/browser/print_stats.js

@@ -23,9 +23,21 @@ const print_stats = {
         const stat_names = [
             "COMPILE",
             "COMPILE_SUCCESS",
+            "COMPILE_CUT_OFF_AT_END_OF_PAGE",
+            "COMPILE_BASIC_BLOCK",
+            "COMPILE_ENTRY_POINT",
             "RUN_INTERPRETED",
+            "RUN_INTERPRETED_PENDING",
+            "RUN_INTERPRETED_NEAR_END_OF_PAGE",
+            "RUN_INTERPRETED_NO_BLOCK_BOUNDARY",
+            "RUN_INTERPRETED_NOT_HOT",
             "RUN_FROM_CACHE",
             "CACHE_MISMATCH",
+            "DO_RUN",
+            "DO_MANY_CYCLES",
+            "CYCLE_INTERNAL",
+            "INVALIDATE_PAGE",
+            "INVALIDATE_CACHE_ENTRY",
             "NONFAULTING_OPTIMIZATION",
             "CLEAR_TLB",
             "FULL_CLEAR_TLB",
@@ -48,6 +60,11 @@ const print_stats = {
             let stat = cpu.wm.exports["_profiler_stat_get"](i);
             stat = stat >= 100e6 ? Math.round(stat / 1e6) + "m" : stat >= 100e3 ? Math.round(stat / 1e3) + "k" : stat;
             text += stat_names[i] + "=" + stat + " ";
+
+            if(i % Math.floor(stat_names.length / 3) === Math.floor(stat_names.length / 3) - 1)
+            {
+                text += "\n";
+            }
         }
 
         text += "\n";
@@ -55,6 +72,8 @@ const print_stats = {
         text += "CACHE_UNUSED=" + cpu.wm.exports["_jit_unused_cache_stat"]() + "\n";
         text += "WASM_TABLE_FREE=" + cpu.wm.exports["_get_wasm_table_index_free_list_count"]() + "\n";
 
+        text += "do_many_cycles avg: " + do_many_cycles_total / do_many_cycles_count + "\n";
+
         return text;
     },
 
@@ -96,11 +115,13 @@ const print_stats = {
     print_wasm_basic_block_count_histogram: function(cpu)
     {
         let text = "";
+        let pending_count = 0;
         const histogram = Object.create(null);
 
         for(let i = 0; i < 0x10000; i++)
         {
             const length = cpu.wm.exports["_jit_get_entry_length"](i);
+            pending_count += cpu.wm.exports["_jit_get_entry_pending"](i);
             histogram[length] = (histogram[length] || 0) + 1;
         }
 
@@ -122,6 +143,8 @@ const print_stats = {
 
         text += "32+:" + above + "\n";
 
+        text += "Pending: " + pending_count + "\n";
+
         return text;
     },
 
@@ -146,6 +169,17 @@ const print_stats = {
         const pad_length = String(max_count).length;
 
         text += "Instruction counts (in 1000):\n";
+        let total = 0;
+        const prefixes = new Set([
+            0x26, 0x2E, 0x36, 0x3E,
+            0x64, 0x65, 0x66, 0x67,
+            0xF0, 0xF2, 0xF3,
+        ]);
+        for(let [i, count] of counts)
+        {
+            total += i < 0x100 && !prefixes.has(i) ? count : 0;
+        }
+        text += "Total: " + total + "\n";
 
         for(let [i, count] of counts)
         {
@@ -181,7 +215,7 @@ const print_stats = {
 
         for(let [opcode, count] of top_counts.slice(0, 100))
         {
-            text += opcode.toString(16) + ":" + count + " ";
+            text += opcode.toString(16) + ":" + (count / total * 100).toFixed(1) + " ";
         }
         text += "\n";
 

+ 3 - 0
src/config.js

@@ -9,6 +9,9 @@
  */
 var DEBUG = true;
 
+/** @const */
+var ENABLE_PROFILER = true;
+
 /** @const */
 var LOG_TO_FILE = false;
 

+ 16 - 0
src/cpu.js

@@ -1141,6 +1141,8 @@ CPU.prototype.load_bios = function()
 
 CPU.prototype.do_run = function()
 {
+    this.wm.exports["_profiler_stat_increment_do_run"]();
+
     // Idle time is when no instructions are being executed
     this.wm.exports["_profiler_end"](P_IDLE);
 
@@ -1170,12 +1172,20 @@ CPU.prototype.do_run = function()
     this.wm.exports["_profiler_start"](P_IDLE);
 };
 
+let do_many_cycles_count = 0;
+let do_many_cycles_total = 0;
+
 CPU.prototype.do_many_cycles = function()
 {
     // Capture the total time we were executing instructions
     this.wm.exports["_profiler_start"](P_DO_MANY_CYCLES);
     this.coverage_logger.log_start();
 
+    if(ENABLE_PROFILER)
+    {
+        var start_time = v86.microtick();
+    }
+
     try {
         this.do_many_cycles_unsafe();
     }
@@ -1184,6 +1194,12 @@ CPU.prototype.do_many_cycles = function()
         this.exception_cleanup(e);
     }
 
+    if(ENABLE_PROFILER)
+    {
+        do_many_cycles_total += v86.microtick() - start_time;
+        do_many_cycles_count++;
+    }
+
     this.coverage_logger.log_end();
     this.wm.exports["_profiler_end"](P_DO_MANY_CYCLES);
     this.wm.exports["_profiler_end"](P_GEN_INSTR);

+ 25 - 0
src/native/cpu.c

@@ -1176,6 +1176,7 @@ static void jit_find_basic_blocks()
                 current_block->next_block_addr = 0;
                 current_block->end_addr = *instruction_pointer;
                 current_block->condition_index = -1;
+                profiler_stat_increment(S_COMPILE_CUT_OFF_AT_END_OF_PAGE);
                 break;
             }
 
@@ -1518,6 +1519,8 @@ static void jit_generate(uint32_t phys_addr)
     {
         struct basic_block* block = &basic_blocks.blocks[i];
 
+        profiler_stat_increment(S_COMPILE_BASIC_BLOCK);
+
         if(block->is_entry_block && block->addr != block->end_addr)
         {
             uint32_t phys_addr = translate_address_read(block->addr);
@@ -1537,6 +1540,7 @@ static void jit_generate(uint32_t phys_addr)
 #endif
 
             entry_point_count++;
+            profiler_stat_increment(S_COMPILE_ENTRY_POINT);
         }
     }
 
@@ -1568,6 +1572,7 @@ void jit_force_generate_unsafe(uint32_t phys_addr)
 
 void cycle_internal()
 {
+    profiler_stat_increment(S_CYCLE_INTERNAL);
 #if ENABLE_JIT
 
     *previous_ip = *instruction_pointer;
@@ -1624,6 +1629,24 @@ void cycle_internal()
         }
         else
         {
+            if(entry)
+            {
+                assert(entry->pending);
+                profiler_stat_increment(S_RUN_INTERPRETED_PENDING);
+            }
+            else if(is_near_end_of_page(phys_addr))
+            {
+                profiler_stat_increment(S_RUN_INTERPRETED_NEAR_END_OF_PAGE);
+            }
+            else if(!did_block_boundary)
+            {
+                profiler_stat_increment(S_RUN_INTERPRETED_NO_BLOCK_BOUNDARY);
+            }
+            else
+            {
+                profiler_stat_increment(S_RUN_INTERPRETED_NOT_HOT);
+            }
+
             jit_run_interpreted(phys_addr);
         }
     }
@@ -1681,6 +1704,8 @@ jit_instr_flags segment_prefix_op_jit(int32_t seg)
 
 void do_many_cycles_unsafe()
 {
+    profiler_stat_increment(S_DO_MANY_CYCLES);
+
 #if 0
     for(int32_t k = 0; k < LOOP_COUNTER; k++)
 #else

+ 9 - 0
src/native/memory.c

@@ -8,6 +8,7 @@
 #include "js_imports.h"
 #include "log.h"
 #include "memory.h"
+#include "profiler/profiler.h"
 
 bool in_mapped_range(uint32_t addr)
 {
@@ -21,9 +22,11 @@ void jit_dirty_index(uint32_t index)
     if(cache_array_index != JIT_CACHE_ARRAY_NO_NEXT_ENTRY)
     {
         page_first_jit_cache_entry[index] = JIT_CACHE_ARRAY_NO_NEXT_ENTRY;
+        profiler_stat_increment(S_INVALIDATE_PAGE);
 
         do
         {
+            profiler_stat_increment(S_INVALIDATE_CACHE_ENTRY);
             struct code_cache* entry = &jit_cache_arr[cache_array_index];
 
             assert(same_page(index << DIRTY_ARR_SHIFT, entry->start_addr));
@@ -150,6 +153,12 @@ int32_t jit_get_entry_address(int32_t i)
     return jit_cache_arr[i].start_addr;
 }
 
+int32_t jit_get_entry_pending(int32_t i)
+{
+    assert(i >= 0 && i < JIT_CACHE_ARRAY_SIZE);
+    return jit_cache_arr[i].pending;
+}
+
 int32_t read8(uint32_t addr)
 {
     if(USE_A20 && *a20_enabled) addr &= A20_MASK;

+ 7 - 0
src/native/profiler/profiler.c

@@ -101,6 +101,12 @@ void profiler_stat_increment(enum stat_name stat)
     profiler_stat_arr[stat].count++;
 }
 
+// to be called from JS
+void profiler_stat_increment_do_run()
+{
+    profiler_stat_increment(S_DO_RUN);
+}
+
 int32_t profiler_stat_get(enum stat_name stat)
 {
     return profiler_stat_arr[stat].count;
@@ -116,6 +122,7 @@ void profiler_print(void) {}
 int32_t profiler_get_time(enum profile_name name) { UNUSED(name); return 0; }
 int32_t profiler_get_total(void) { return 0; }
 void profiler_stat_increment(enum stat_name stat) { UNUSED(stat); }
+void profiler_stat_increment_do_run() {}
 int32_t profiler_stat_get(enum stat_name stat) { UNUSED(stat); return 0; }
 
 

+ 18 - 2
src/native/profiler/profiler.h

@@ -22,14 +22,29 @@ enum profile_name {
 };
 
 
-#define PROFILER_STAT_COUNT 13
-
 enum stat_name {
     S_COMPILE,
     S_COMPILE_SUCCESS,
+    S_COMPILE_CUT_OFF_AT_END_OF_PAGE,
+    S_COMPILE_BASIC_BLOCK,
+    S_COMPILE_ENTRY_POINT,
+
     S_RUN_INTERPRETED,
+    S_RUN_INTERPRETED_PENDING,
+    S_RUN_INTERPRETED_NEAR_END_OF_PAGE,
+    S_RUN_INTERPRETED_NO_BLOCK_BOUNDARY,
+    S_RUN_INTERPRETED_NOT_HOT,
+
     S_RUN_FROM_CACHE,
     S_CACHE_MISMATCH,
+
+    S_DO_RUN,
+    S_DO_MANY_CYCLES,
+    S_CYCLE_INTERNAL,
+
+    S_INVALIDATE_PAGE,
+    S_INVALIDATE_CACHE_ENTRY,
+
     S_NONFAULTING_OPTIMIZATION,
 
     S_CLEAR_TLB,
@@ -37,6 +52,7 @@ enum stat_name {
     S_TLB_FULL,
     S_TLB_GLOBAL_FULL,
 };
+#define PROFILER_STAT_COUNT (S_TLB_GLOBAL_FULL - S_COMPILE + 1)
 
 struct profiler_stat {
     int32_t count;