simple-rt/shaders/cube.wgsl

struct PushConstants
{
    view_projection: mat4x4<f32>,
    transform: mat4x4<f32>,
    eye_position: vec3<f32>,
    root_color: vec4<f32>,
    root_subdivided: u32,
}

var<push_constant> constants: PushConstants;

struct VertexOutput
{
    @builtin(position) pos: vec4<f32>,
    @location(0) eye_pos: vec3<f32>,
    @location(1) world_pos: vec3<f32>,
    @location(2) cube_pos: vec3<f32>,
    @location(3) root_color: vec4<f32>,
    @location(4) root_subdivided: u32
}

@vertex
fn vertex_main(@builtin(vertex_index) index: u32, @builtin(instance_index) instance_index: u32) -> VertexOutput
{
    let side_length = u32(100);
    let offset = vec3<f32>(vec3<u32>(
        instance_index % side_length,
        (instance_index / side_length) % side_length,
        instance_index / (side_length * side_length)
    ));

    let cube_vertices = array<vec3<f32>, 8>(
        vec3<f32>(0., 0., 0.),
        vec3<f32>(0., 0., 1.),
        vec3<f32>(1., 0., 1.),
        vec3<f32>(1., 0., 0.),

        vec3<f32>(0., 1., 0.),
        vec3<f32>(0., 1., 1.),
        vec3<f32>(1., 1., 1.),
        vec3<f32>(1., 1., 0.),
    );

    let cube_faces = array<u32, 24>(
        // Bottom face
        1, 0, 2, 3,

        // Top face
        4, 5, 7, 6,

        // Side faces
        0, 1, 4, 5,
        1, 2, 5, 6,
        2, 3, 6, 7,
        3, 0, 7, 4,
    );

    let quad_index = index / (3 * 2);
    let triangle_index = index % (3 * 2);
    let triangle_map = array<u32, 6>(
        0, 1, 2, 1, 3, 2
    );

    let vertex = cube_vertices[cube_faces[quad_index * 4 + triangle_map[triangle_index]]];

    var output: VertexOutput;
    output.pos = constants.view_projection * constants.transform * vec4<f32>(vertex + offset, 1.);
    output.eye_pos = constants.eye_position;
    output.world_pos = (constants.transform * vec4<f32>(vertex + offset, 1.)).xyz;
    output.cube_pos = offset;

    output.root_color = constants.root_color;
    output.root_subdivided = constants.root_subdivided;
    return output;
}

fn is_voxel(p: vec3<i32>) -> bool
{
    //return true;
    return length(vec3<f32>(p) - vec3<f32>(16.)) < 16.;
}

const N: u32 = 4;
const N3: u32 = N * N * N;
const MAX_DEPTH: u32 = 4;

struct StructureTile
{
    children: array<u32, 64>,
}

struct ColorTile
{
    colors: array<vec4<f32>, 64>
}

// BG
@group(0) @binding(0) var<storage> structure_tiles : array<StructureTile>;
@group(0) @binding(1) var<storage> color_tiles : array<ColorTile>;

fn traverse2(max_depth: u32, root_color: vec4<f32>, root_subdivided: bool, eye_pos: vec3<f32>, ray_dir: vec3<f32>) -> vec4<f32>
{
    if max_depth == 0 || !root_subdivided
    {
        return inferno_quintic(0.);
        return root_color;
    }

    var node_structs = structure_tiles[0];
    var node_colors = color_tiles[0];
    var node_tile_idx = u32(0);
    //var stack = array<u32, 4>(0, 0, 0, 0);
    var stack_struct = array<StructureTile, 4>(node_structs, node_structs, node_structs, node_structs);
    var stack_colors = array<ColorTile, 4>(node_colors, node_colors, node_colors, node_colors);

    var voxel_scale = N * N * N;
    var scale_exp = 3;
    let voxel_scale_lut = array<u32, 4>(
        1,
        N,
        N * N,
        N * N * N
    );

    let dist = 1. / ray_dir;
    let origin = eye_pos * 256.;
    var pos = origin; // In voxel space
    let offset = - origin * dist;
    let wall_offset = select(vec3(0.), vec3(1.), ray_dir > vec3(0.));
    var voxel_pos = clamp(vec3<u32>(floor(pos)), vec3(0), vec3(N * N * N * N - 1));
    let max = 20.;
    for(var iter = 0; iter < 60; iter++)
    {
        var voxel_pos = vec3<u32>(floor(pos));
        var child_pos = voxel_pos >> vec3<u32>(scale_exp * 2);
        var local_child_pos = child_pos & vec3<u32>(3);
        var child_idx = local_child_pos.x + local_child_pos.y * N + local_child_pos.z * N * N;

        while (node_structs.children[child_idx] >> 31) != 0 && (u32(4 - scale_exp) <  max_depth)
        {
            //stack[scale_exp] = u32(node_tile_idx);
            stack_struct[scale_exp] = node_structs;
            stack_colors[scale_exp] = node_colors;
            scale_exp -= 1;
            voxel_scale = voxel_scale_lut[scale_exp];

            node_tile_idx = node_structs.children[child_idx] & 0x3fffffff;
            node_structs = structure_tiles[node_tile_idx];
            node_colors = color_tiles[node_tile_idx];

            child_pos = voxel_pos >> vec3<u32>(scale_exp * 2);
            local_child_pos = child_pos & vec3<u32>(3);
            child_idx = local_child_pos.x + local_child_pos.y * N + local_child_pos.z * N * N;
        }

        if node_colors.colors[child_idx].w != 0
        {
            return inferno_quintic(f32(iter) / max);
            return node_colors.colors[child_idx];
        }

        // Compute intersection
        let global_voxel = vec3<f32>(child_pos * voxel_scale);
        let cell_max = global_voxel + vec3<f32>(voxel_scale) * wall_offset;

        let t1 = fma(dist, cell_max, offset);

        let t_far = min(t1.x, min(t1.y, t1.z)) + 0.001;
        pos = origin + t_far * ray_dir;

        let diff = vec3<u32>(pos + 256) ^ vec3<u32>(global_voxel + 256);
        let diff_exp = (firstLeadingBit((diff.x | diff.y | diff.z)) >> 1);

        if diff_exp > 3
        {
            return inferno_quintic(f32(iter) / max);
            discard;
        }

        if diff_exp > u32(scale_exp)
        {
            scale_exp = i32(diff_exp);

            voxel_scale = voxel_scale_lut[scale_exp];
            //node_tile_idx = stack[scale_exp];
            node_structs = stack_struct[scale_exp];
            node_colors = stack_colors[scale_exp];
        }
    }

    return vec4(1., 0., 1., 1.);
}

//@fragment
fn traverse(max_depth: u32, root_color: vec4<f32>, root_subdivided: bool, eye_pos: vec3<f32>, ray_dir: vec3<f32>) -> vec4<f32>
{
    if max_depth == 0 || !root_subdivided
    {
        return root_color;
    }
    let chunk_size = N * N * N * N;
    let depth_child_size_lut = array<u32, 4>(N*N*N, N*N, N, 1);

    var stack_nodes = array<i32, 4>(0, 0, 0, 0);
    var stack_child_pos = array<vec3<i32>, 4>(vec3(0), vec3(0), vec3(0), vec3(0));
    var stack_node_offset = array<vec3<i32>, 4>(vec3(0), vec3(0), vec3(0), vec3(0));
    var stack_ptr = 0;
    var current_child_size = chunk_size / N;
    var current_child_pos = vec3(0);
    var current_node_offset = vec3(0);
    var current_depth = 1;

    var current_tile_index = 0;
    var current_children_data = structure_tiles[current_tile_index];
    var current_children_colors = color_tiles[current_tile_index];

    // Intersection parameters
    let dist = 1. / ray_dir;
    var offset = - (eye_pos * f32(chunk_size)) * dist;

    // Interesect with root
    let t0 = fma(dist, vec3<f32>(0.), offset);
    let t1 = fma(dist, vec3<f32>(chunk_size), offset);

    let tmin = min(t0, t1);
    let tmax = max(t0, t1);

    let t_near = max(max(tmin.x, max(tmin.y, tmin.z)), 0.);
    let t_far = min(tmax.x, min(tmax.y, tmax.z));

    let step = select(vec3(-1), vec3(1), ray_dir > vec3(0.));

    var start_pos = (eye_pos * f32(chunk_size)) + t_near * ray_dir;
    var hit_pos = start_pos;
    offset = - start_pos * dist;
    var t = 0.;

    current_child_pos = vec3(
        clamp(i32(floor(start_pos.x)), 0, i32(chunk_size) - 1),
        clamp(i32(floor(start_pos.y)), 0, i32(chunk_size) - 1),
        clamp(i32(floor(start_pos.z)), 0, i32(chunk_size) - 1),
    ) / i32(current_child_size);

    //return vec4<f32>(vec3<f32>(current_child_pos) / 4., 1.);

    for(var iter = 0; iter < 300; iter++)
    {
         // Retrieve current child information
        let child_index = current_child_pos.x + current_child_pos.y * i32(N)  + current_child_pos.z * i32(N * N);

        let child_u32 = current_children_data.children[child_index];
        let child_subdivided = (child_u32 >> 31) == 1;
        let child_color = current_children_colors.colors[child_index];

        if child_color.w != 0. // Child is solid
            && (max_depth == u32(current_depth) || !child_subdivided)
        {
            // Sample mat
            let voxel_pos = current_node_offset + current_child_pos * i32(current_child_size);
            //return vec4(child_color.xyz, 1.);
            return child_color+ vec4<f32>(vec3<f32>(f32(iter) / 200.), 1.);
        }

        // Advance
        // Project current child
        let global_child_pos = current_child_pos * i32(current_child_size) + current_node_offset;

        let t0 = fma(dist, vec3<f32>(global_child_pos), offset);
        let t1 = fma(dist, vec3<f32>(global_child_pos) + vec3<f32>(current_child_size), offset);

        let tmin = min(t0, t1);
        let tmax = max(t0, t1);

        let t_near = max(max(tmin.x, max(tmin.y, tmin.z)), 0.);
        let t_far = min(tmax.x, min(tmax.y, tmax.z));

        if child_subdivided
        {
            // Push operation

            stack_nodes[stack_ptr] = current_tile_index;
            stack_child_pos[stack_ptr] = current_child_pos;
            stack_node_offset[stack_ptr] = current_node_offset;
            stack_ptr ++;

            // Retrieve child information
            current_tile_index = i32(child_u32 & 0x3fffffff);
            current_children_data = structure_tiles[current_tile_index];
            current_children_colors = color_tiles[current_tile_index];

            // Determine child of the child
            let hit_pos = start_pos + ray_dir * t_near;

            let next_node_offset = current_node_offset + current_child_pos * i32(current_child_size);
            let next_child_size = current_child_size / N;
            current_child_pos =
                (clamp(vec3<i32>(floor(hit_pos)), global_child_pos, global_child_pos + vec3(i32(current_child_size - 1))) - next_node_offset) / i32(next_child_size);

            current_child_size = next_child_size;
            current_node_offset = next_node_offset;
            current_depth ++;

        }
        else
        {
            // ADVANCE
            let advance_mask = min_mask3i32(tmax);
            let next_child = current_child_pos + advance_mask * step;
            if any(next_child < vec3(0)) || any(next_child >= vec3(i32(N)))
            {

                let aligned_child = select(vec3(0), vec3(i32(N)), vec3(step) > vec3(0));
                let masked_aligned = advance_mask * ((aligned_child * i32(current_child_size)) + current_node_offset);
                let exiting_axis = masked_aligned.x + masked_aligned.y + masked_aligned.z + 256;

                // HARDCODED FOR N = 4
                let ctz = countTrailingZeros(exiting_axis) / 2;
                let exiting_depth = 4 - ctz;

                if exiting_depth == 0 // Getting out of root
                {
                    return vec4(f32(iter) / 200.);
                    discard;
                }

                // Restore destination depth
                current_depth = exiting_depth;
                stack_ptr = current_depth - 1;

                current_tile_index = stack_nodes[stack_ptr];
                current_children_data = structure_tiles[current_tile_index];
                current_children_colors = color_tiles[current_tile_index];

                current_node_offset = stack_node_offset[stack_ptr];
                current_child_pos = stack_child_pos[stack_ptr] + step * advance_mask;

                current_child_size = depth_child_size_lut[current_depth - 1];
            }else{
                current_child_pos = next_child;
            }
        }
     }

    return vec4<f32>(100., 0., 100., 100.);
}

fn sample_mat(pos: vec3<i32>) -> f32
{
    var voxel = pos;
    var div = 1;
    var overlay = 1.;
    for(var i = 1; i <= 4; i++)
    {
        let x = (voxel.x / div + voxel.y / div + voxel.z / div) % 2 == 0;
        overlay -= select(0., 1. / (f32(i) * 2.5), x);
        div *= 4;
    }
    return overlay;
}

@fragment
fn fragment_tree_main(in: VertexOutput) -> @location(0) vec4<f32>
{
    var hit_pos = vec3(0.);
    let dir = normalize(in.world_pos.xyz - in.eye_pos);
    let chunk_size = 4 * 4 * 4 * 4;
    if all(in.eye_pos > in.cube_pos) && all(in.eye_pos < (in.cube_pos + vec3(1.)))
    {
        hit_pos = in.eye_pos - in.cube_pos;
    }
    else
    {
        let aabb = intersectAABB(in.eye_pos, dir, in.cube_pos, in.cube_pos + vec3(1.));
        hit_pos = in.eye_pos + aabb.x * dir - in.cube_pos;
    }

    let cube_color = vec3(1.);

    var pos = hit_pos * f32(chunk_size);
    let step = vec3<i32>(
        select(-1, 1, dir.x > 0.),
        select(-1, 1, dir.y > 0.),
        select(-1, 1, dir.z > 0.)
    );

    var voxel = vec3<i32>(
        clamp(i32(floor(pos.x)), 0, chunk_size - 1),
        clamp(i32(floor(pos.y)), 0, chunk_size - 1),
        clamp(i32(floor(pos.z)), 0, chunk_size - 1),
    );

    var div = 1;
    var overlay = 1.;
    for(var i = 1; i <= 4; i++)
    {
        let x = (voxel.x / div + voxel.y / div + voxel.z / div) % 2 == 0;
        overlay -= select(0., 1. / (f32(i) * 2.5), x);
        div *= 4;
    }
    overlay = 1.;
    return overlay * traverse2(4, in.root_color, in.root_subdivided != 0, hit_pos, dir);
}

@fragment
fn fragment_main(in: VertexOutput) -> @location(0) vec4<f32>
{
    let chunk_size = 32;

    let dir = normalize(in.world_pos.xyz - in.eye_pos);

    var hit_pos = vec3(0.);
    if all(in.eye_pos > in.cube_pos) && all(in.eye_pos < (in.cube_pos + vec3(1.)))
    {
        hit_pos = in.eye_pos - in.cube_pos;
    }
    else
    {
        let aabb = intersectAABB(in.eye_pos, dir, in.cube_pos, in.cube_pos + vec3(1.));
        hit_pos = in.eye_pos + aabb.x * dir - in.cube_pos;
    }

    let cube_color = vec3(1.);

    var pos = hit_pos * f32(chunk_size);
    let step = vec3<i32>(
        select(-1, 1, dir.x > 0.),
        select(-1, 1, dir.y > 0.),
        select(-1, 1, dir.z > 0.)
    );

    var voxel = vec3<i32>(
        clamp(i32(floor(pos.x)), 0, chunk_size - 1),
        clamp(i32(floor(pos.y)), 0, chunk_size - 1),
        clamp(i32(floor(pos.z)), 0, chunk_size - 1),
    );

    let tDelta = vec3<f32>(1.) / abs(dir);
    var dist = vec3(
        select(pos.x - f32(voxel.x), f32(voxel.x) + 1. - pos.x, step.x > 0),
        select(pos.y - f32(voxel.y), f32(voxel.y) + 1. - pos.y, step.y > 0),
        select(pos.z - f32(voxel.z), f32(voxel.z) + 1. - pos.z, step.z > 0),
    );
    var tMax = dist * tDelta;
    //var tMax = (ceil(vec3<f32>(step) * pos) - vec3<f32>(step) * pos) * tDelta;
    var t = 0.;


    // Loop
    loop
    {
        if any(voxel >= vec3<i32>(chunk_size)) || any(voxel < vec3<i32>(0))
        {
            discard;
            //break;
        }
        // Sample
        if is_voxel(voxel)
        {
            // Compute normal
            let voxel_center = vec3<f32>(voxel) + vec3(0.5);
            let pos = pos + t * dir;

            let norm_dir = normalize(pos - voxel_center);
            let norm_dir_max = max_mask3 (abs(norm_dir));
            let norm = sign(norm_dir * norm_dir_max);

            let color = (1.2 + dot(norm, vec3<f32>(0., 1., 0.))) * 0.5;
            return vec4(vec3<f32>(color) * cube_color, 1.);
            return vec4(vec3<f32>(color), 1.);
        }

        // Select which to step
        let mask = min_mask3(tMax);
        let delta = tDelta * mask;

        let next_t_vec = tMax * mask;
        t = next_t_vec.x + next_t_vec.y + next_t_vec.z;

        tMax += delta;

        voxel += step * vec3<i32>(mask);

    }

    // Ray direction
    return vec4<f32>(1., 0., 1., 1.);
}

fn min_mask3(v: vec3<f32>) -> vec3<f32>
{
    let min = min(v.x, min(v.y, v.z));

    return vec3<f32>
    (
        select(0., 1., v.x == min),
        select(0., 1., v.y == min),
        select(0., 1., v.z == min),
    );
}

fn min_mask3i32(v: vec3<f32>) -> vec3<i32>
{
    let min = min(v.x, min(v.y, v.z));

    return vec3<i32>
    (
        select(0, 1, v.x == min),
        select(0, 1, v.y == min),
        select(0, 1, v.z == min),
    );
}

fn max_mask3(v: vec3<f32>) -> vec3<f32>
{
    let max = max(v.x, max(v.y, v.z));

    return vec3<f32>
    (
        select(0., 1., v.x == max),
        select(0., 1., v.y == max),
        select(0., 1., v.z == max),
    );
}

fn intersectAABB(rayOrigin: vec3<f32>, rayDir: vec3<f32>, boxMin: vec3<f32>, boxMax: vec3<f32>) -> vec2<f32> {
    let tMin = (boxMin - rayOrigin) / rayDir;
    let tMax = (boxMax - rayOrigin) / rayDir;
    let t1 = min(tMin, tMax);
    let t2 = max(tMin, tMax);
    let tNear = max(max(t1.x, t1.y), t1.z);
    let tFar = min(min(t2.x, t2.y), t2.z);
    return vec2(tNear, tFar);
};

fn inferno_quintic( xx: f32 ) -> vec4<f32>
{
	let x = saturate(xx);
	let x1 = vec4( 1.0, x, x * x, x * x * x ); // 1 x x2 x3
	let x2 = x1 * x1.w * x; // x4 x5 x6 x7
	return vec4(saturate( vec3(
		dot( x1.xyzw, vec4( -0.027780558, 1.228188385, 0.278906882, 3.892783760 ) ) + dot( x2.xy, vec2( -8.490712758, 4.069046086 ) ),
		dot( x1.xyzw, vec4( 0.014065206, 0.015360518, 1.605395918, -4.821108251 ) ) + dot( x2.xy, vec2( 8.389314011, -4.193858954 ) ),
		dot( x1.xyzw, vec4( -0.019628385, 3.122510347, -5.893222355, 2.798380308 ) ) + dot( x2.xy, vec2( -3.608884658, 4.324996022 ) ) ) ), 1.);
}