r3d.c 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. #include <stdint.h>
  2. #include <malloc.h>
  3. #include <stdlib.h>
  4. #include <stdio.h>
  5. #include "raspi.h"
  6. #include "vc4.h"
  7. #include "vc4_control_list.h"
  8. #include "r3d.h"
  9. #define aligned_alloc memalign
  10. #define R3D_ALIGN 256
  11. static uint8_t* r3d_bin_address;
  12. static uint8_t* r3d_bin_base;
  13. static uint8_t* r3d_bin_ctl_lists;
  14. static uint8_t* r3d_render_ctl_lists;
  15. static uint8_t* r3d_vertex_lists;
  16. static uint8_t* r3d_shader_states;
  17. static uint8_t* r3d_overspill_mem;
  18. static uint8_t* r3d_gouraud_shader;
  19. static uint8_t cl_idx;
  20. static int r3d_tile_rows;
  21. static int r3d_tile_cols;
  22. static uint32_t* FB;
  23. static uint8_t* control_list_bin;
  24. static uint8_t* control_list_bin_end;
  25. static uint8_t* control_list_render;
  26. static uint8_t* control_list_render_end;
  27. static uint8_t* next_shader_state;
  28. static nv_vertex_t* next_triangles;
  29. static int next_num_triangles;
  30. // linux driver: https://github.com/anholt/linux/blob/vc4-kms-v3d/drivers/gpu/drm/vc4/vc4_gem.c
  31. // mesa: http://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/vc4/vc4_draw.c
  32. void r3d_write_binning_list(uint8_t* cl) {
  33. // get next binning control list ---------------------------------------------------------------
  34. control_list_bin = cl;
  35. cl += vc4_tile_binning_mode_conf(cl, (uint32_t)r3d_bin_address, R3D_BIN_SIZE, (uint32_t)r3d_bin_base,
  36. r3d_tile_cols, r3d_tile_rows, vc4_auto_initialise_tile_state_data_array);
  37. cl += vc4_start_tile_binning(cl);
  38. cl += vc4_clip_window(cl, 0,0,1920,1080);
  39. cl += vc4_configuration_bits(cl, vc4_enable_forward_facing_primitive + vc4_enable_reverse_facing_primitive, 0); // vc4_early_z_updates_enable
  40. cl += vc4_viewport_offset(cl, 0,0);
  41. //cl += vc4_coordinate_array_primitives(cl, vc4_primitives_type_triangles, 3*NUM_TRIS, (uint32_t)triangles);
  42. // confirmed that this order is correct
  43. cl += vc4_nv_shader_state(cl, (uint32_t)next_shader_state);
  44. cl += vc4_vertex_array_primitives(cl, vc4_mode_triangles, 3 * next_num_triangles, 0);
  45. cl += vc4_flush_all_state(cl);
  46. control_list_bin_end = cl;
  47. }
  48. void r3d_write_render_list(uint8_t* cl) {
  49. control_list_render = cl;
  50. uint32_t clear_color = 0xffffffff;
  51. cl += vc4_clear_colors(cl, clear_color, clear_color, 0, 0, 0);
  52. cl += vc4_tile_rendering_mode_conf(cl, (uint32_t)FB, 1920, 1080, vc4_frame_buffer_color_format_rgba8888);
  53. // these are optional instructions for hardware thread synchronization
  54. //cl += vc4_wait_on_semaphore(cl);
  55. // unclear if necessary
  56. //cl += vc4_nv_shader_state(cl, (uint32_t)next_shader_state);
  57. //cl += vc4_vertex_array_primitives(cl, vc4_mode_triangles, 3 * next_num_triangles, 0);
  58. cl += vc4_tile_coordinates(cl, 0,0);
  59. cl += vc4_store_tile_buffer_general(cl, 0, 0, 0); // disable double buffer swap 1<<4 in second zero arg
  60. //cl += vc4_reserved2(cl); // from bcm ghw_composer_impl.cpp createRendList(), supposed to be a MARK
  61. // assumes 1920x1080
  62. r3d_tile_rows = 16;
  63. r3d_tile_cols = 30;
  64. for (int y = 0; y < r3d_tile_rows; y++) {
  65. for (int x = 0; x < r3d_tile_cols; x++) {
  66. cl += vc4_tile_coordinates(cl, x,y);
  67. cl += vc4_branch_to_sublist(cl, (uint32_t)r3d_bin_address + ((y * r3d_tile_cols + x) * 32)); // sublists are 32 bytes long
  68. if (y!=r3d_tile_rows-1 || x!=r3d_tile_cols-1) {
  69. cl += vc4_store_multi_sample(cl);
  70. } else {
  71. //cl += vc4_store_multi_sample(cl);
  72. }
  73. }
  74. }
  75. cl += vc4_store_multi_sample_end(cl); // end of frame
  76. control_list_render_end = cl;
  77. }
  78. // clear_color is RGBA8888
  79. void r3d_init(uint32_t* fb) {
  80. FB = fb;
  81. // RENDER CONTROL LIST -------------------------------------------------
  82. r3d_bin_address = (uint8_t*)aligned_alloc(R3D_ALIGN, R3D_BIN_SIZE);
  83. r3d_bin_base = (uint8_t*)aligned_alloc(R3D_ALIGN, R3D_BIN_SIZE);
  84. //r3d_render_ctl_list = (uint8_t*)aligned_alloc(R3D_ALIGN, CTL_BLOCK_SIZE * 10); // FIXME: enough?
  85. //uint8_t* cl = r3d_render_ctl_list;
  86. printf("-- bin address at %p\r\n",r3d_bin_address);
  87. printf("-- bin base at %p\r\n",r3d_bin_base);
  88. //printf("-- render control list at %p\r\n",r3d_render_ctl_list);
  89. r3d_overspill_mem = aligned_alloc(R3D_ALIGN,R3D_OVERSPILL_SIZE);
  90. r3d_bin_ctl_lists = (uint8_t*)aligned_alloc(R3D_ALIGN, CTL_BLOCK_SIZE * NUM_CTL_LISTS);
  91. r3d_render_ctl_lists = (uint8_t*)aligned_alloc(R3D_ALIGN, CTL_BLOCK_SIZE * NUM_CTL_LISTS);
  92. r3d_vertex_lists = (uint8_t*)aligned_alloc(R3D_ALIGN, CTL_BLOCK_SIZE * NUM_CTL_LISTS);
  93. r3d_shader_states = (uint8_t*)aligned_alloc(R3D_ALIGN, CTL_BLOCK_SIZE * NUM_CTL_LISTS);
  94. r3d_gouraud_shader = (uint8_t*)aligned_alloc(R3D_ALIGN, CTL_BLOCK_SIZE * 1);
  95. vc4_gouraud_shader(r3d_gouraud_shader);
  96. r3d_write_render_list(r3d_render_ctl_lists);
  97. cl_idx = -1;
  98. }
  99. nv_vertex_t* r3d_init_frame() {
  100. // reset and stop binning and render threads
  101. //*((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_CT0CS)) = (1<<15) | (1<<5);
  102. //*((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_CT1CS)) = (1<<15) | (1<<5);
  103. // advance lists index (cycle through lists)
  104. cl_idx++;
  105. if (cl_idx>(NUM_CTL_LISTS-1)) cl_idx = 0;
  106. //printf("~~ cl_idx: %d\r\n",cl_idx);
  107. next_shader_state = r3d_shader_states+cl_idx*CTL_BLOCK_SIZE;
  108. next_triangles = (nv_vertex_t*)(r3d_vertex_lists+cl_idx*CTL_BLOCK_SIZE);
  109. return next_triangles;
  110. }
  111. void r3d_triangles(int num_triangles, nv_vertex_t* triangles) {
  112. next_triangles = triangles;
  113. next_num_triangles = num_triangles;
  114. //vc4_shader_state_record(flat_shader_state, flat_shader);
  115. vc4_nv_shader_state_record(next_shader_state, r3d_gouraud_shader, 3, 6*4, (uint8_t*)next_triangles);
  116. }
  117. extern void khrn_hw_full_memory_barrier(void);
  118. void r3d_render_frame(uint32_t clear_color) {
  119. arm_dmb();
  120. arm_dsb();
  121. arm_isb();
  122. r3d_write_binning_list(r3d_bin_ctl_lists+cl_idx*CTL_BLOCK_SIZE);
  123. arm_invalidate_data_caches();
  124. arm_dmb();
  125. //khrn_hw_full_memory_barrier();
  126. // reset binning and frame flush counters
  127. *((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_BFC)) = 1;
  128. *((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_RFC)) = 1;
  129. // submit binning list
  130. *((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_CT0CA)) = (uint32_t)control_list_bin;
  131. *((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_CT0EA)) = (uint32_t)control_list_bin_end;
  132. //khrn_hw_full_memory_barrier();
  133. arm_dmb();
  134. //printf("~~ submitted binning list %p-%p\r\n",control_list_bin,control_list_bin_end);
  135. //cl = r3d_render_ctl_lists+cl_idx*CTL_BLOCK_SIZE;
  136. // submit render
  137. *((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_CT1CA)) = (uint32_t)control_list_render;
  138. *((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_CT1EA)) = (uint32_t)control_list_render_end;
  139. arm_dmb();
  140. //printf("~~ submitted rendering list %p-%p\r\n",control_list_render,control_list_render_end);
  141. uint32_t bfc = 0;
  142. /*do {
  143. //printf("bfc loop\r\n");
  144. arm_dmb();
  145. r3d_debug_gpu();
  146. bfc = *((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_BFC));
  147. } while (bfc==0);
  148. */
  149. uint32_t rfc = 0;
  150. /*do {
  151. //printf("rfc loop\r\n");
  152. arm_dmb();
  153. r3d_debug_gpu();
  154. rfc = *((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_RFC));
  155. } while (rfc==0);*/
  156. uint32_t ct1cs = 0x20;
  157. int timeout = 0;
  158. do {
  159. timeout++;
  160. if (timeout>1000*1000) break;
  161. ct1cs = *((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_CT1CS));
  162. } while (ct1cs & 0x20);
  163. //printf("~~ r3d timeout: %d\r\n",timeout);
  164. }
  165. void r3d_debug_gpu() {
  166. uint32_t dbge, fdbgo, fdbgr, fdbgs, bfc, errstat, pcs, status0, status1, rfc;
  167. arm_dmb();
  168. arm_dsb();
  169. dbge = *((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_DBGE));
  170. fdbgo = *((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_FDBGO));
  171. fdbgr = *((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_FDBGR));
  172. fdbgs = *((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_FDBGS));
  173. bfc = *((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_BFC));
  174. rfc = *((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_RFC));
  175. errstat = *((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_ERRSTAT));
  176. pcs = *((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_PCS));
  177. status0 = *((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_CT0CS));
  178. status1 = *((volatile uint32_t*)(PERIPHERAL_BASE + V3D_BASE + V3D_CT1CS));
  179. //printf("-- BFC: 0x%x RFC: 0x%x PCS: 0x%x ERRST: 0x%x DBGE: 0x%x DBGO: 0x%x DBGR: 0x%x DBGS: 0x%x ST0: 0x%x ST1: 0x%x\r\n",bfc,rfc,pcs,errstat,dbge,fdbgo,fdbgr,fdbgs,status0,status1);
  180. }