WebGPU & Neighbor Search ( 近傍探索 ) - WebGPU プログラミング

WebGPU で Compute Shader を用いて、Neighbor Search を行ってみました。
f:id:onagat12:20201211004355g:plain
2d 画面を 9個のセルに区切り、注目の粒子を赤色、同じセル内の粒子を黄色、
その隣のセル内にある粒子を緑色、それ以外の粒子を灰色で示しています。
隣接セルは、注目粒子に近いセルとします。
Neighbor Search の方法は、以下のサイトを参考にしています。
１ SPH法の実装(GPU実装含む) - PukiWiki for PBCG Lab
３ [#Unity] ComputeShaderで近傍探索の高速化 - Qiita
また、Neighbor Search で用いる Bitonic Sort は、以下のサイトを参考に
しています。
JavaScriptでバイトニックソート - Qiita
プログラム
（GPUバッファで計算した値を確認するため、バッファ読み取り用の
処理も入れてあります。コメント行にしています。）
NeighborSearch-WebGPU.html
<!DOCTYPE html>
<html>
<head>
    <title>GPU FindNeighbors-2d Test</title>
    <meta charset="utf-8">
    <script src="utils.js"></script>
</head>
<body>
<canvas id="webgpu-canvas"></canvas>
<script>
utils.checkSupport();

(async () => {
    const [adapter, glslang] = await Promise.all([
        navigator.gpu.requestAdapter(),
        import("https://unpkg.com/@webgpu/glslang@0.0.15/dist/web-devel/glslang.js").then(m => m.default())
    ]);
  
    const device = await adapter.requestDevice();

    const canvas = document.getElementById("webgpu-canvas");
    canvas.width = 400;
    canvas.height = 400;

    const context = canvas.getContext("gpupresent");

    const swapChainFormat = "bgra8unorm"; 
    const swapChain = context.configureSwapChain({
        device,
        format: swapChainFormat
    });

    const powerN = 4;
    const dataNum = 2 ** powerN;
    const PARTICLE_NUM = dataNum;
    const PARTICLE_SIZE = 3;
    const POINT_SIZE = 0.04; // 0.02

    const viewRadius = 0.2;
    const bucketSize = 2.0 * viewRadius;
    const bucketNum = Math.ceil(1.0 / bucketSize);
    const bucketIndexNum = bucketNum * bucketNum; // 2d
    const particleIndex = 0;

    const positionData = new Float32Array(PARTICLE_NUM * PARTICLE_SIZE);
    const velocityData = new Float32Array(PARTICLE_NUM * PARTICLE_SIZE);

    var buckets = []; // for test

    for (let i = 0 ; i < positionData.length / PARTICLE_SIZE ; i++){
        positionData[i * PARTICLE_SIZE + 0] = Math.random();
        positionData[i * PARTICLE_SIZE + 1] = Math.random();
        positionData[i * PARTICLE_SIZE + 2] = 0;

        velocityData[i * PARTICLE_SIZE + 0] = (Math.random() * 2.0 - 1.0) * 0.002;
        velocityData[i * PARTICLE_SIZE + 1] = (Math.random() * 2.0 - 1.0) * 0.002;
        velocityData[i * PARTICLE_SIZE + 2] = 0.0;

        // for test
        var bucketX = Math.floor(positionData[i * PARTICLE_SIZE + 0] / bucketSize);
        var bucketY = Math.floor(positionData[i * PARTICLE_SIZE + 1] / bucketSize);
        var bucketIndex = bucketX + bucketY * bucketNum;
        buckets.push([bucketIndex, i]);
    }
    //console.log("position: ", positionData);

    const positionBuffer = device.createBuffer({
        size: positionData.byteLength,
        usage: GPUBufferUsage.VERTEX | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
    });
    device.defaultQueue.writeBuffer(positionBuffer, 0, positionData);

    const velocityBuffer = device.createBuffer({
        size: velocityData.byteLength,
        usage: GPUBufferUsage.VERTEX | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
    });
    device.defaultQueue.writeBuffer(velocityBuffer, 0, velocityData);

    //console.log("buckets: ", buckets);
    // cpu bitonicsort( for check)
    let n = powerN;
      for (let p = 0; p < n; p++) {
        for(let q = 0; q <=p; q++) {

            const d = 1 << (p - q);

            for (let i = 0; i < buckets.length; i++) {
                const up = ((i >> p) & 2) === 0;
                if ((i & d) == 0 && (buckets[i][0] > buckets[i | d][0]) === up) {
                    const tmp = buckets[i];
                    buckets[i] = buckets[i | d];
                    buckets[i | d] = tmp;
                }
            }
        }
      }
      console.log("bukets sorted: ", buckets);

    const bucketsArray = new Uint32Array(PARTICLE_NUM * 2);
    const bucketsBuffer = device.createBuffer({
        size: bucketsArray.byteLength,
        usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
    });

    // Work Data for Sort
    const workBufferSize = bucketsArray.byteLength;
    const workBuffer = device.createBuffer({
        size: workBufferSize,
        usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
    });

    // Result Data ( for buckets )
    //const resultBufferSize = bucketsArray.byteLength;
    //const resultBuffer = device.createBuffer({
    //    size: resultBufferSize,
    //    usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
    //    // 注意 resultBuffer: COPY_SRC
    //});
    
    // bucketRef Data and Buffer
    const bucketRefData = new Uint32Array(bucketIndexNum * 2);
  
    for (let i = 0 ; i < bucketIndexNum ; i++){
        bucketRefData[i * 2 + 0] = 65535;
        bucketRefData[i * 2 + 1] = 65535;
    }
    console.log("bucketRef: ", bucketRefData);

    const bucketRefBuffer = device.createBuffer({
        size: bucketRefData.byteLength,
        usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
    });
    device.defaultQueue.writeBuffer(bucketRefBuffer, 0, bucketRefData);

    // neighbors Data and Buffer
    const neighbors = new Uint32Array(PARTICLE_NUM);
    //for (let i = 0 ; i < PARTICLE_NUM ; i++){
    //    neighbors[i] = 65535;
    //}
    //console.log("neighbors: ", neighbors);

    const neighborsBuffer = device.createBuffer({
        size: neighbors.byteLength,
        usage: GPUBufferUsage.VERTEX | GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
    });
    //device.defaultQueue.writeBuffer(neighborsBuffer, 0, neighbors);

    // Result Data
    //const resultBufferSize = bucketRefData.byteLength; // for bucketRef
    //const resultBufferSize = neighbors.byteLength; // for findNeighbors
    //const resultBuffer = device.createBuffer({
    //    size: resultBufferSize,
    //    usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
    //    // 注意 resultBuffer: COPY_SRC
    //});

    // pq uniform Buffer
    var uniformBuffer = device.createBuffer({
        size: 2 * 4,
        usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
    });

    // particleIndex uniform Buffer
    //const particleUniformBuffer = device.createBuffer({
    //    size: 4,
    //    usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
    //});

    // Compute shader code (GLSL)
    const computeShader_initialize_buckets = `
    #version 450

    // constants
    #define PARTICLE_NUM ${PARTICLE_NUM}
    #define PARTICLE_SIZE ${PARTICLE_SIZE}
    #define BUCKET_NUM ${bucketNum}
    #define BUCKET_SIZE ${bucketSize}
    #define BUCKETIDX_NUM ${bucketIndexNum}

    layout(set = 0, binding = 0) buffer Particle {
        float particle[ PARTICLE_NUM * PARTICLE_SIZE ];
    } particle;

    layout(set = 0, binding = 1) buffer Velocity {
        float velocity[ PARTICLE_NUM * PARTICLE_SIZE ];
    } velocity;
    
    layout(set = 0, binding = 2) buffer BucketsData {
        uint buckets[ PARTICLE_NUM * 2 ];
    };

    layout(set = 0, binding = 3) buffer WorkData {
        uint numbers[ PARTICLE_NUM * 2 ];
    } workData;

    layout(set = 0, binding = 4) buffer BucketRef {
        uint bucketRef[ BUCKETIDX_NUM * 2 ];
    };

    layout(set = 0, binding = 5) buffer Neighbors {
        uint neighbors[ PARTICLE_NUM ];
    };

    //layout(set = 0, binding = 6) buffer Results {
    //    //uint numbers[ PARTICLE_NUM * 2 ];
    //    //uint numbers[ BUCKETIDX_NUM * 2 ]; // for bucketRef
    //    uint numbers[ PARTICLE_NUM ]; // for findNeeighbors
    //} resultData;

    layout(set = 0, binding = 7) uniform StepUniform {
        vec2 pq;
    };

    //layout(set = 0, binding = 8) uniform ParticleIndex {
    //    uint particleIndex;
    //};
    
    void main()
    {
        uint index = gl_GlobalInvocationID.x;
        if ( index >= PARTICLE_NUM ) { return; }

        // buckets index
        vec3 position = vec3(
            particle.particle[ index * PARTICLE_SIZE + 0 ],
            particle.particle[ index * PARTICLE_SIZE + 1 ],
            particle.particle[ index * PARTICLE_SIZE + 2 ]
        );

        uint bucketX = uint( floor(position.x / BUCKET_SIZE) );
        uint bucketY = uint( floor(position.y / BUCKET_SIZE) );
        uint bucketIndex = bucketX + bucketY * BUCKET_NUM;
        buckets[index * 2 + 0 ] = bucketIndex;
        buckets[index * 2 + 1 ] = index;
    }
    `;

    const computeShaderSort = `
    #version 450

    #define N ${powerN}
    #define PARTICLE_NUM ${PARTICLE_NUM}
    #define PARTICLE_SIZE ${PARTICLE_SIZE}
    #define BUCKETIDX_NUM ${bucketIndexNum}

    layout(set = 0, binding = 0) buffer Particle {
        float particle[ PARTICLE_NUM * PARTICLE_SIZE ];
    } particle;

    layout(set = 0, binding = 1) buffer Velocity {
        float velocity[ PARTICLE_NUM * PARTICLE_SIZE ];
    } velocity;

    layout(set = 0, binding = 2) buffer BucketsData {
        uint buckets[ PARTICLE_NUM * 2 ];
    };

    layout(set = 0, binding = 3) buffer WorkData {
        uint numbers[ PARTICLE_NUM * 2 ];
    } workData;

    layout(set = 0, binding = 4) buffer BucketRef {
        uint bucketRef[ BUCKETIDX_NUM * 2 ];
    };

    layout(set = 0, binding = 5) buffer Neighbors {
        uint neighbors[ PARTICLE_NUM ];
    };

    //layout(set = 0, binding = 6) buffer ResultData {
    //    //uint numbers[ PARTICLE_NUM * 2 ];
    //    //uint numbers[ BUCKETIDX_NUM * 2 ]; // for bucketRef
    //    uint numbers[ PARTICLE_NUM ]; // for findNeeighbors
    //} resultData;

    layout(set = 0, binding = 7) uniform StepUniform {
        vec2 pq;
    };

    //layout(set = 0, binding = 8) uniform ParticleIndex {
    //    uint particleIndex;
    //};

    const uint n = N;

    void main() {
        uint index = gl_GlobalInvocationID.x;

        uint p = uint(pq.x);
        uint q = uint(pq.y);

        uint d = 1u << (p - q);

        bool up = ((index >> p) & 2u) == 0u;
        if ((index & d) == 0 && (buckets[index * 2 + 0] > buckets[(index + d) * 2 + 0]) == up) {
            workData.numbers[index * 2 + 0] = buckets[(index + d) * 2 + 0];
            workData.numbers[index * 2 + 1] = buckets[(index + d) * 2 + 1];
        } else if ((index & d) == d && (buckets[(index - d) * 2 + 0] > buckets[index * 2 + 0]) == up) {
            workData.numbers[index * 2 + 0] = buckets[(index - d) * 2 + 0];
            workData.numbers[index * 2 + 1] = buckets[(index - d) * 2 + 1];
        } else {
            workData.numbers[index * 2 + 0] = buckets[index * 2 + 0];
            workData.numbers[index * 2 + 1] = buckets[index * 2 + 1];
        }
    }
    `;

    const computeShaderWork = `
    #version 450

    #define PARTICLE_NUM ${PARTICLE_NUM}
    #define PARTICLE_SIZE ${PARTICLE_SIZE}
    #define BUCKETIDX_NUM ${bucketIndexNum}

    layout(set = 0, binding = 0) buffer Particle {
        float particle[ PARTICLE_NUM * PARTICLE_SIZE ];
    } particle;

    layout(set = 0, binding = 1) buffer Velocity {
        float velocity[ PARTICLE_NUM * PARTICLE_SIZE ];
    } velocity;

    layout(set = 0, binding = 2) buffer BucketsData {
        uint buckets[ PARTICLE_NUM * 2 ];
    };

    layout(set = 0, binding = 3) buffer WorkData {
        uint numbers[ PARTICLE_NUM * 2 ];
    } workData;

    layout(set = 0, binding = 4) buffer BucketRef {
        uint bucketRef[ BUCKETIDX_NUM * 2 ];
    };

    layout(set = 0, binding = 5) buffer Neighbors {
        uint neighbors[ PARTICLE_NUM ];
    };

    //layout(set = 0, binding = 6) buffer ResultData {
    //    //uint numbers[ PARTICLE_NUM * 2 ];
    //    //uint numbers[ BUCKETIDX_NUM * 2 ]; // for bucketRef
    //    uint numbers[ PARTICLE_NUM ]; // for findNeeighbors
    //} resultData;

    layout(set = 0, binding = 7) uniform StepUniform {
        vec2 pq;
    };

    //layout(set = 0, binding = 8) uniform ParticleIndex {
    //    uint particleIndex;
    //};

    void main() {
        uint index = gl_GlobalInvocationID.x;

        buckets[index * 2 + 0] = workData.numbers[index * 2 + 0];
        buckets[index * 2 + 1] = workData.numbers[index * 2 + 1];
    }
    `;

    const computeShader_bucketRef = `
    #version 450

    #define PARTICLE_NUM ${PARTICLE_NUM}
    #define PARTICLE_SIZE ${PARTICLE_SIZE}
    #define BUCKETIDX_NUM ${bucketIndexNum}

    layout(set = 0, binding = 0) buffer Particle {
        float particle[ PARTICLE_NUM * PARTICLE_SIZE ];
    } particle;

    layout(set = 0, binding = 1) buffer Velocity {
        float velocity[ PARTICLE_NUM * PARTICLE_SIZE ];
    } velocity;

    layout(set = 0, binding = 2) buffer BucketsData {
        uint buckets[ PARTICLE_NUM * 2 ];
    };

    layout(set = 0, binding = 3) buffer WorkData {
        uint numbers[ PARTICLE_NUM * 2 ];
    } workData;

    layout(set = 0, binding = 4) buffer BucketRef {
        uint bucketRef[ BUCKETIDX_NUM * 2 ];
    };

    layout(set = 0, binding = 5) buffer Neighbors {
        uint neighbors[ PARTICLE_NUM ];
    };

    //layout(set = 0, binding = 6) buffer ResultData {
    //    //uint numbers[ PARTICLE_NUM * 2 ];
    //    //uint numbers[ BUCKETIDX_NUM * 2 ]; // for bucketRef
    //    uint numbers[ PARTICLE_NUM ]; // for findNeeighbors
    //} resultData;

    layout(set = 0, binding = 7) uniform StepUniform {
        vec2 pq;
    };

    //layout(set = 0, binding = 8) uniform ParticleIndex {
    //    uint particleIndex;
    //};

    void main() {
        // dispatch PARTICLE_NUM + 1 ( 0 to PARTICLE_NUM)
        uint index = gl_GlobalInvocationID.x;
        
        if (index == 0) {
            uint refIndex_start = buckets[index * 2 + 0];
            bucketRef[refIndex_start * 2 + 0] = 0;
        } else if (index < PARTICLE_NUM) {
            uint refIndex_start = buckets[index * 2 + 0];
            uint refIndex_end   = buckets[(index - 1) * 2 + 0];
            if (refIndex_start != refIndex_end) {
                bucketRef[refIndex_start * 2 + 0] = index;
                bucketRef[refIndex_end   * 2 + 1] = index - 1;
            }
        } else {
            uint refIndex_end   = buckets[(index - 1) * 2 + 0];
            bucketRef[refIndex_end * 2 + 1] = index - 1;
        }
    }
    `;

    const computeShader_findNeighbors = `
    #version 450

    #define PARTICLE_NUM ${PARTICLE_NUM}
    #define PARTICLE_SIZE ${PARTICLE_SIZE}
    #define BUCKET_NUM ${bucketNum}
    #define BUCKET_SIZE ${bucketSize}
    #define BUCKETIDX_NUM ${bucketIndexNum}
    #define particleIndex ${particleIndex}

    layout(set = 0, binding = 0) buffer Particle {
        float particle[ PARTICLE_NUM * PARTICLE_SIZE ];
    } particle;

    layout(set = 0, binding = 1) buffer Velocity {
        float velocity[ PARTICLE_NUM * PARTICLE_SIZE ];
    } velocity;

    layout(set = 0, binding = 2) buffer BucketsData {
        uint buckets[ PARTICLE_NUM * 2 ];
    };

    layout(set = 0, binding = 3) buffer WorkData {
        uint numbers[ PARTICLE_NUM * 2 ];
    } workData;

    layout(set = 0, binding = 4) buffer BucketRef {
        uint bucketRef[ BUCKETIDX_NUM * 2 ];
    };

    layout(set = 0, binding = 5) buffer Neighbors {
        uint neighbors[ PARTICLE_NUM ];
    };

    //layout(set = 0, binding = 6) buffer ResultData {
    //    //uint numbers[ PARTICLE_NUM * 2 ];
    //    //uint numbers[ BUCKETIDX_NUM * 2 ]; // for bucketRef
    //    uint numbers[ PARTICLE_NUM ]; // for findNeeighbors
    //} resultData;

    layout(set = 0, binding = 7) uniform StepUniform {
        vec2 pq;
    };

    //layout(set = 0, binding = 8) uniform ParticleIndex {
    //    uint particleIndex;
    //};

    void findNeighbors(uint index, ivec2 bucketPosition, uint neighborFlag) {
        if (bucketPosition.x < 0 || bucketPosition.x >= BUCKET_NUM || bucketPosition.y < 0 || bucketPosition.y >= BUCKET_NUM) {
            return;
        }

        uint bucketIndex = bucketPosition.x + BUCKET_NUM * bucketPosition.y;
        
        uint cellStart = bucketRef[bucketIndex * 2 + 0];
        uint cellEnd   = bucketRef[bucketIndex * 2 + 1];
        if (cellStart == 65535 || cellEnd == 65535) {
            return;
        }

        for (uint i = cellStart; i <= cellEnd; i++) {
            if (buckets[i * 2 + 1] == index) {
                //neighbors[index] = buckets[i * 2 + 1];
                neighbors[index] = neighborFlag;
            }
        }
    }

    void main() {
        uint index = gl_GlobalInvocationID.x;

        uint pIndex = particleIndex;

        vec2 position = vec2(
            particle.particle[ pIndex * PARTICLE_SIZE + 0 ],
            particle.particle[ pIndex * PARTICLE_SIZE + 1 ]
        );

        vec2 bucketPosition = position / BUCKET_SIZE;

        int  xOffset = fract(bucketPosition.x) < 0.5 ? -1: 1;
        int  yOffset = fract(bucketPosition.y) < 0.5 ? -1: 1;

        ivec2 bucketPosition00 = ivec2(bucketPosition);
        ivec2 bucketPosition10 = bucketPosition00 + ivec2(xOffset, 0);
        ivec2 bucketPosition01 = bucketPosition00 + ivec2(0, yOffset);
        ivec2 bucketPosition11 = bucketPosition00 + ivec2(xOffset, yOffset);
        
        findNeighbors(index, bucketPosition00, 0);
        findNeighbors(index, bucketPosition10, 100);
        findNeighbors(index, bucketPosition01, 100);
        findNeighbors(index, bucketPosition11, 100);

    }
    `;

    const computeShaderRead = `
    #version 450

    #define PARTICLE_NUM ${PARTICLE_NUM}
    #define PARTICLE_SIZE ${PARTICLE_SIZE}
    #define BUCKETIDX_NUM ${bucketIndexNum}

    layout(set = 0, binding = 0) buffer Particle {
        float particle[ PARTICLE_NUM * PARTICLE_SIZE ];
    } particle;

    layout(set = 0, binding = 1) buffer Velocity {
        float velocity[ PARTICLE_NUM * PARTICLE_SIZE ];
    } velocity;

    layout(set = 0, binding = 2) buffer BucketsData {
        uint buckets[ PARTICLE_NUM * 2 ];
    };

    layout(set = 0, binding = 3) buffer WorkData {
        uint numbers[ PARTICLE_NUM * 2 ];
    } workData;

    layout(set = 0, binding = 4) buffer BucketRef {
        uint bucketRef[ BUCKETIDX_NUM * 2 ];
    };

    layout(set = 0, binding = 5) buffer Neighbors {
        uint neighbors[ PARTICLE_NUM ];
    };
    
    //layout(set = 0, binding = 6) buffer ResultData {
    //    //uint numbers[ PARTICLE_NUM * 2 ];
    //    //uint numbers[ BUCKETIDX_NUM * 2 ]; // for bucketRef
    //    uint numbers[ PARTICLE_NUM ]; // for findNeeighbors
    //} resultData;

    layout(set = 0, binding = 7) uniform StepUniform {
        vec2 pq;
    };

    //layout(set = 0, binding = 8) uniform ParticleIndex {
    //    uint particleIndex;
    //};

    void main() {
        uint index = gl_GlobalInvocationID.x;

        // for buckets
        //resultData.numbers[index * 2 + 0 ] = buckets[index * 2 + 0];
        //resultData.numbers[index * 2 + 1 ] = buckets[index * 2 + 1];

        // for bucketRef
        //resultData.numbers[index * 2 + 0 ] = bucketRef[index * 2 + 0];
        //resultData.numbers[index * 2 + 1 ] = bucketRef[index * 2 + 1];

        // for neighbors
        //resultData.numbers[index] = neighbors[index];
    }
    `;

    const computeShader_integrate = `
    #version 450
    #define PARTICLE_NUM ${PARTICLE_NUM}
    #define PARTICLE_SIZE ${PARTICLE_SIZE}
    #define BUCKETIDX_NUM ${bucketIndexNum}

    layout(set = 0, binding = 0) buffer Particle {
        float particle[ PARTICLE_NUM * PARTICLE_SIZE ];
    } particle;

    layout(set = 0, binding = 1) buffer Velocity {
        float velocity[ PARTICLE_NUM * PARTICLE_SIZE ];
    } velocity;

    layout(set = 0, binding = 2) buffer BucketsData {
        uint buckets[ PARTICLE_NUM * 2 ];
    };

    layout(set = 0, binding = 3) buffer WorkData {
        uint numbers[ PARTICLE_NUM * 2 ];
    } workData;

    layout(set = 0, binding = 4) buffer BucketRef {
        uint bucketRef[ BUCKETIDX_NUM * 2 ];
    };

    layout(set = 0, binding = 5) buffer Neighbors {
        uint neighbors[ PARTICLE_NUM ];
    };

    //layout(set = 0, binding = 6) buffer ResultData {
    //    //uint numbers[ PARTICLE_NUM * 2 ];
    //    uint numbers[ PARTICLE_NUM ]; // for findNeeighbors
    //} resultData;

    layout(set = 0, binding = 7) uniform StepUniform {
        vec2 pq;
    };

    //layout(set = 0, binding = 8) uniform ParticleIndex {
    //    uint particleIndex;
    //};

    void main() {
        uint index = gl_GlobalInvocationID.x;
        if ( index >= PARTICLE_NUM ) { return; }

        vec3 position = vec3(
            particle.particle[ index * PARTICLE_SIZE + 0 ] + velocity.velocity[ index * PARTICLE_SIZE + 0 ],
            particle.particle[ index * PARTICLE_SIZE + 1 ] + velocity.velocity[ index * PARTICLE_SIZE + 1 ],
            particle.particle[ index * PARTICLE_SIZE + 2 ] + velocity.velocity[ index * PARTICLE_SIZE + 2 ]
            //    //    particle.particle[ index * PARTICLE_SIZE + 0 ],
            //    //    particle.particle[ index * PARTICLE_SIZE + 1 ],
            //    //    particle.particle[ index * PARTICLE_SIZE + 2 ]
        );
        //vec3 position =
            //    particle.particle[ index ] + velocity.velocity[ index ];
            
        if ( position.x <= 0.001 || position.x >= 0.999 ) {
            velocity.velocity[ index * PARTICLE_SIZE + 0 ] = - velocity.velocity[ index * PARTICLE_SIZE + 0 ];
            //velocity.velocity[ index ].x = - velocity.velocity[ index ].x;
        }

        //if ( abs( position.y ) >= ROOM_SIZE ) {
        if ( position.y <= 0.001 || position.y >= 0.999 ) {
            velocity.velocity[ index * PARTICLE_SIZE + 1 ] = - velocity.velocity[ index * PARTICLE_SIZE + 1 ];
            //velocity.velocity[ index ].y = - velocity.velocity[ index ].y;
        }

        //if ( abs( position.z ) >= ROOM_SIZE ) {
        if ( position.z <= 0.001 || position.z >= 0.999 ) {
            velocity.velocity[ index * PARTICLE_SIZE + 2 ] = - velocity.velocity[ index * PARTICLE_SIZE + 2 ];
            //velocity.velocity[ index ].z = - velocity.velocity[ index ].z;
        }

        particle.particle[ index * PARTICLE_SIZE + 0 ] = position.x;
        particle.particle[ index * PARTICLE_SIZE + 1 ] = position.y;
        particle.particle[ index * PARTICLE_SIZE + 2 ] = position.z;
        //particle.particle[ index ] = position;
    }
    `;

    // Bind group layout and bind group
    const bindGroupLayout = device.createBindGroupLayout({
        entries: [
        {
            binding: 0,
            visibility: GPUShaderStage.COMPUTE,
            type: "storage-buffer"
        },
        {
            binding: 1,
            visibility: GPUShaderStage.COMPUTE,
            type: "storage-buffer"
        },
        {
            binding: 2,
            visibility: GPUShaderStage.COMPUTE,
            type: "storage-buffer"
        },
        {
            binding: 3,
            visibility: GPUShaderStage.COMPUTE,
            type: "storage-buffer"
        },
        {
            binding: 4,
            visibility: GPUShaderStage.COMPUTE,
            type: "storage-buffer"
        },
        {
            binding: 5,
            visibility: GPUShaderStage.COMPUTE,
            type: "storage-buffer"
        },
        //{
        //    binding: 6,
        //    visibility: GPUShaderStage.COMPUTE,
        //    type: "storage-buffer"
        //},
        {
            binding: 7,
            visibility: GPUShaderStage.COMPUTE,
            type: "uniform-buffer"
        }
        //{
        //    binding: 8,
        //    visibility: GPUShaderStage.COMPUTE,
        //    type: "uniform-buffer"
        //}
        ]
    });

    const bindGroup = device.createBindGroup({
        layout: bindGroupLayout,
        entries: [
        {
            binding: 0,
            resource: {
                buffer: positionBuffer
            }
        },
        {
            binding: 1,
            resource: {
                buffer: velocityBuffer
            }
        },
        {
            binding: 2,
            resource: {
                buffer: bucketsBuffer
            }
        },
        {
            binding: 3,
            resource: {
                buffer: workBuffer
            }
        },
        {
            binding: 4,
            resource: {
                buffer: bucketRefBuffer
            }
        },
        {
            binding: 5,
            resource: {
                buffer: neighborsBuffer
            }
        },
        //{
        //    binding: 6,
        //    resource: {
        //        buffer: resultBuffer
        //    }
        //},
        {
            binding: 7,
            resource: {
                buffer: uniformBuffer
            }
        }
        //{
        //    binding: 8,
        //    resource: {
        //        buffer: particleUniformBuffer
        //    }
        //}
        ]
    });
    
    // Pipeline setup
    const computePipeline_InitializeBackets = device.createComputePipeline({
        layout: device.createPipelineLayout({
            bindGroupLayouts: [bindGroupLayout]
        }),
        computeStage: {
            module: device.createShaderModule({
                code: glslang.compileGLSL(computeShader_initialize_buckets, "compute")
            }),
            entryPoint: "main"
        }
    });

    const computePipeline_Sort = device.createComputePipeline({
        layout: device.createPipelineLayout({
            bindGroupLayouts: [bindGroupLayout]
        }),
        computeStage: {
            module: device.createShaderModule({
                code: glslang.compileGLSL(computeShaderSort, "compute")
            }),
            entryPoint: "main"
        }
    });

    const computePipeline_Work = device.createComputePipeline({
        layout: device.createPipelineLayout({
            bindGroupLayouts: [bindGroupLayout]
        }),
        computeStage: {
            module: device.createShaderModule({
                code: glslang.compileGLSL(computeShaderWork, "compute")
            }),
            entryPoint: "main"
        }
    });

    const computePipeline_bucketRef = device.createComputePipeline({
        layout: device.createPipelineLayout({
            bindGroupLayouts: [bindGroupLayout]
        }),
        computeStage: {
            module: device.createShaderModule({
                code: glslang.compileGLSL(computeShader_bucketRef, "compute")
            }),
            entryPoint: "main"
        }
    });

    const computePipeline_findNeighbors = device.createComputePipeline({
        layout: device.createPipelineLayout({
            bindGroupLayouts: [bindGroupLayout]
        }),
        computeStage: {
            module: device.createShaderModule({
                code: glslang.compileGLSL(computeShader_findNeighbors, "compute")
            }),
            entryPoint: "main"
        }
    });

    const computePipeline_integrate = device.createComputePipeline({
        layout: device.createPipelineLayout({
            bindGroupLayouts: [bindGroupLayout]
        }),
        computeStage: {
            module: device.createShaderModule({
                code: glslang.compileGLSL(computeShader_integrate, "compute")
            }),
            entryPoint: "main"
        }
    });

    const computePipeline_Read = device.createComputePipeline({
        layout: device.createPipelineLayout({
            bindGroupLayouts: [bindGroupLayout]
        }),
        computeStage: {
            module: device.createShaderModule({
                code: glslang.compileGLSL(computeShaderRead, "compute")
            }),
            entryPoint: "main"
        }
    });

    const vs = `
    #version 450

    layout(location=0) in vec3 vertexPosition;
    layout(location=1) in vec3 instancePosition;
    //
    layout(location=2) in uint neighbor;

    layout(location = 0) out vec3 vColor;

    const float scale = ${POINT_SIZE};

    void main() {
        mat4 scaleMTX = mat4(
            scale, 0, 0, 0,
            0, scale , 0, 0,
            0, 0, scale, 0,
            instancePosition, 1
        );

        uint id = gl_InstanceIndex;
        if (id == 0) {
            vColor = vec3(1.0, 0.0, 0.0);
        } else if (neighbor == 0) {
            vColor = vec3(1.0, 1.0, 0.0);
        } else if (neighbor == 100) {
            vColor = vec3(0.0, 1.0, 0.0);
        } else {
            vColor = vec3(0.7, 0.7, 0.7);
        }

        gl_Position = scaleMTX * vec4(vertexPosition, 1.0);
    } 
    `;

    const fs = `
    #version 450

    layout(location=0) in vec3 vColor;

    layout(location=0) out vec4 fragColor;

    void main() {
        fragColor = vec4(vColor, 1.0);
    } 
    `;

    const cubeData = utils.createCube();
    const numVertices = cubeData.positions.length / 3;

    const vertexBuffer = device.createBuffer({
        size: cubeData.positions.byteLength,
        usage: GPUBufferUsage.VERTEX | GPUBufferUsage.COPY_DST
    });
    device.defaultQueue.writeBuffer(vertexBuffer, 0, cubeData.positions);
    
    const pipeline = device.createRenderPipeline({
        layout: device.createPipelineLayout({bindGroupLayouts: []}),
        vertexStage: {
            module: device.createShaderModule({
                code: glslang.compileGLSL(vs, "vertex")
            }),
            entryPoint: "main"
        },
        fragmentStage: {
            module: device.createShaderModule({
                code: glslang.compileGLSL(fs, "fragment")
            }),
            entryPoint: "main"
        },
        primitiveTopology: "triangle-list",
        vertexState: {
            vertexBuffers: [
                {
                    arrayStride: 12,
                    attributes: [{
                        shaderLocation: 0,
                        format: "float3",
                        offset: 0
                    }]
                },
                // instancePosition
                {
                    arrayStride: 12,
                    stepMode: "instance",
                    attributes: [{
                        shaderLocation: 1,
                        format: "float3",
                        offset: 0
                    }]
                },
                // neighbor
                {
                    arrayStride: 4,
                    stepMode: "instance",
                    attributes: [{
                        shaderLocation: 2,
                        format: "uint",
                        offset: 0
                    }]
                },
            ]
        },
        colorStates: [{
            format: swapChainFormat
        }]
    });


    // GPU buffer for reading resultBUffer
    //const gpuReadBuffer = device.createBuffer({
    //    size: resultBufferSize,
    //    usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ
    //});

    requestAnimationFrame(function draw() {

        // clear neighbors
        for (let i = 0 ; i < PARTICLE_NUM ; i++){
            neighbors[i] = 65535;
        }
        device.defaultQueue.writeBuffer(neighborsBuffer, 0, neighbors);

    // Commands submission
    // Initialize Buckets
    var commandEncoder = device.createCommandEncoder();
    var passEncoder = commandEncoder.beginComputePass();

    passEncoder.setPipeline(computePipeline_InitializeBackets);
    passEncoder.setBindGroup(0, bindGroup);
    passEncoder.dispatch(dataNum);
    
    passEncoder.endPass();
    device.defaultQueue.submit([commandEncoder.finish()]);

    // Sort Buckets (p, q)
    for (let p = 0; p < powerN; p++) { // dataNum -> powerN
        for (let q = 0; q <= p; q++) {
            Sort(p, q);
        }
    }
    // end of Sort

    // bucketRef Buffer
    var commandEncoder = device.createCommandEncoder();
    var passEncoder = commandEncoder.beginComputePass();

    passEncoder.setPipeline(computePipeline_bucketRef);
    passEncoder.setBindGroup(0, bindGroup);
    passEncoder.dispatch(dataNum + 1);
    
    passEncoder.endPass();
    device.defaultQueue.submit([commandEncoder.finish()]);

    // find neighbors
    // particleUniformBuffer set value
    //const particleIndexData = new Uint32Array([0]);
    //device.defaultQueue.writeBuffer(particleUniformBuffer, 0, particleIndexData);

    var commandEncoder = device.createCommandEncoder();
    var passEncoder = commandEncoder.beginComputePass();

    passEncoder.setPipeline(computePipeline_findNeighbors);
    passEncoder.setBindGroup(0, bindGroup);
    passEncoder.dispatch(dataNum);
    
    passEncoder.endPass();
    device.defaultQueue.submit([commandEncoder.finish()]);

    // Read Buffer
    var commandEncoder = device.createCommandEncoder();
    var passEncoder = commandEncoder.beginComputePass();

    //passEncoder.setPipeline(computePipeline_Read);
    //passEncoder.setBindGroup(0, bindGroup);
    //passEncoder.dispatch(dataNum);
    //passEncoder.dispatch(bucketIndexNum); // for bucketRef

    passEncoder.setPipeline(computePipeline_integrate);
    passEncoder.setBindGroup(0, bindGroup);
    passEncoder.dispatch(PARTICLE_NUM);
    
    passEncoder.endPass();

    // Encode commands for copying buffer to buffer.
    //commandEncoder.copyBufferToBuffer( resultBuffer, 0, gpuReadBuffer, 0, resultBufferSize);

    // Submit GPU commands.
    //device.defaultQueue.submit([commandEncoder.finish()]);

    //
    const textureView = swapChain.getCurrentTexture().createView();

    const renderPass = commandEncoder.beginRenderPass({
        colorAttachments: [{
            attachment: textureView,
            loadValue: [0, 0, 0, 1]
        }]
    });

    renderPass.setPipeline(pipeline);
    renderPass.setVertexBuffer(0, vertexBuffer);
    renderPass.setVertexBuffer(1, positionBuffer);
    renderPass.setVertexBuffer(2, neighborsBuffer);
    //renderPass.setViewport(0, 0, canvas.width, canvas.height, 0, 1);
    renderPass.draw(numVertices, PARTICLE_NUM, 0, 0);

    renderPass.endPass();

    device.defaultQueue.submit([commandEncoder.finish()]);

    // Read buffer.
    //await gpuReadBuffer.mapAsync(GPUMapMode.READ);
    //const arrayBuffer = gpuReadBuffer.getMappedRange();
    //
    //const arrayData = new Uint32Array(arrayBuffer);
    //var arrayData2 = [];
    //for (let i = 0; i < bucketIndexNum; i++) {
    //    arrayData2.push([arrayData[i * 2 + 0], arrayData[i * 2 + 1]]);
    //}
    //console.log("resultData: ", new Uint32Array(arrayBuffer));
    //console.log("resultData: ", arrayData);
    //console.log("reultData2: ", arrayData2);

    requestAnimationFrame(draw);
    });

    // Sort function
    function Sort(p, q) {
        var commandEncoder = device.createCommandEncoder();
        var passEncoder = commandEncoder.beginComputePass();
    
        var pq = new Float32Array([p, q]);
        device.defaultQueue.writeBuffer(uniformBuffer, 0, pq);

        passEncoder.setPipeline(computePipeline_Sort);
        passEncoder.setBindGroup(0, bindGroup);
        passEncoder.dispatch(dataNum);

        passEncoder.setPipeline(computePipeline_Work);
        passEncoder.setBindGroup(0, bindGroup);
        passEncoder.dispatch(dataNum);

        passEncoder.endPass();
        device.defaultQueue.submit([commandEncoder.finish()]);
    }
})();
</script>
</body>
</html>