Files
camera-extrinsic-play/src/App.tsx

470 lines
16 KiB
TypeScript

import { Grid, useBVH, useGLTF, CameraControls, AccumulativeShadows, OrbitControls, Stats } from '@react-three/drei'
import { Camera, Canvas, useFrame, useThree, useLoader, RenderCallback, RootState } from '@react-three/fiber'
import * as THREE from 'three'
import { FontLoader } from 'three/addons/loaders/FontLoader.js'
import { TextGeometry } from 'three/addons/geometries/TextGeometry.js'
import HelvetikerRegular from "three/examples/fonts/helvetiker_regular.typeface.json"
import { useEffect, useRef, useState, JSX } from 'react'
// import POSE_3D_ from "./assets/result_ae_01_ae_08.json"
import POSE_3D_ from "./assets/temp_result.json"
import POSE_3D_MANY_ from "./assets/many_people_all_3d_pose.json"
import POSE_3D_04_02_ from "./assets/res.json"
// 133, 3
type PosePoints3D = [number, number, number][]
// F, 133, 3
type AnimePosePoints3D = PosePoints3D[]
interface Skeleton0402 {
"a": PosePoints3D
"b": PosePoints3D
}
const POSE_3D = POSE_3D_ as AnimePosePoints3D
const POSE_3D_MANY = POSE_3D_MANY_ as AnimePosePoints3D[] // N F 133 3
const POSE_3D_04_02 = POSE_3D_04_02_ as Skeleton0402
const THREE_ADDONS = {
FontLoader,
TextGeometry,
} as const
// Create OpenCV to OpenGL conversion matrix
// OpenCV: X right, Y down, Z forward
// OpenGL: X right, Y up, Z backward
const CV_TO_GL_MAT = new THREE.Matrix4().set(
1, 0, 0, 0,
0, -1, 0, 0,
0, 0, -1, 0,
0, 0, 0, 1
)
// Z-up to Y-up conversion matrix
// Rotate -90 degrees around X axis to convert from Z-up to Y-up
const Z_UP_TO_Y_UP = new THREE.Matrix4().set(
-1, 0, 0, 0,
0, 0, -1, 0,
0, -1, 0, 0,
0, 0, 0, 1
)
const Z_UP_TO_Y_UP_PRIME = new THREE.Matrix4().set(
1, 0, 0, 0,
0, 0, 1, 0,
0, 1, 0, 0,
0, 0, 0, 1
)
// Color definitions for different body parts
const COLOR_SPINE = new THREE.Color(138 / 255, 201 / 255, 38 / 255) // green, spine & head
const COLOR_ARMS = new THREE.Color(255 / 255, 202 / 255, 58 / 255) // yellow, arms & shoulders
const COLOR_LEGS = new THREE.Color(25 / 255, 130 / 255, 196 / 255) // blue, legs & hips
const COLOR_FINGERS = new THREE.Color(255 / 255, 0, 0) // red, fingers
const COLOR_FACE = new THREE.Color(255 / 255, 200 / 255, 0) // yellow, face
const COLOR_FOOT = new THREE.Color(255 / 255, 128 / 255, 0) // orange, foot
const COLOR_HEAD = new THREE.Color(255 / 255, 0, 255 / 255) // purple, head
// Body bone connections
const BODY_BONES = [
// legs
[15, 13], [13, 11], [16, 14], [14, 12], [11, 12], // legs
[5, 11], [6, 12], [5, 6], // torso
[5, 7], [7, 9], [6, 8], [8, 10], // arms
[1, 2], [0, 1], [0, 2], [1, 3], [2, 4], // head
[15, 17], [15, 18], [15, 19], // left foot
[16, 20], [16, 21], [16, 22], // right foot
] as const
// Body bone colors
const BODY_BONE_COLORS = [
COLOR_LEGS, COLOR_LEGS, COLOR_LEGS, COLOR_LEGS, COLOR_LEGS,
COLOR_SPINE, COLOR_SPINE, COLOR_SPINE,
COLOR_ARMS, COLOR_ARMS, COLOR_ARMS, COLOR_ARMS,
COLOR_HEAD, COLOR_HEAD, COLOR_HEAD, COLOR_HEAD, COLOR_HEAD,
COLOR_FOOT, COLOR_FOOT, COLOR_FOOT,
COLOR_FOOT, COLOR_FOOT, COLOR_FOOT,
] as const
// Hand bone connections (in pairs of [start, end] indices)
const HAND_BONES = [
// right hand
[91, 92], [92, 93], [93, 94], [94, 95], // right thumb
[91, 96], [96, 97], [97, 98], [98, 99], // right index
[91, 100], [100, 101], [101, 102], [102, 103], // right middle
[91, 104], [104, 105], [105, 106], [106, 107], // right ring
[91, 108], [108, 109], [109, 110], [110, 111], // right pinky
// left hand
[112, 113], [113, 114], [114, 115], [115, 116], // left thumb
[112, 117], [117, 118], [118, 119], [119, 120], // left index
[112, 121], [121, 122], [122, 123], [123, 124], // left middle
[112, 125], [125, 126], [126, 127], [127, 128], // left ring
[112, 129], [129, 130], [130, 131], [131, 132] // left pinky
] as const
const DEFAULT_TRANSFORMATION_MATRIX = [
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0,
0, 0, 0, 1,
] as const
const DEFAULT_NEAR = 0.05
const DEFAULT_FAR = 1
const CAMERA_EXTRINSIC_MATRIX_MAP: Record<string, number[]> = {
"AE_01": [
0.37408302, -0.91907411, 0.12395429, 1.18976111, 0.17243349,
-0.06239751, -0.98304285, -0.06429779, 0.91122367, 0.38911351,
0.13513731, 2.51940833, 0., 0., 0.,
1.
] as const,
"AE_1A": [
0.92998171, -0.36696694, -0.02166301, 2.21643671, -0.05110403,
-0.07070226, -0.99618752, -0.72948697, 0.36403626, 0.92754324,
-0.0845053, 6.45800206, 0., 0., 0.,
1.
] as const,
"AE_08": [
0.98195914, -0.18888337, -0.00890642, 1.43011854, -0.02247979,
-0.06984105, -0.99730481, -0.61678831, 0.18775226, 0.97951279,
-0.07282712, 5.81983825, 0., 0., 0.,
1.
] as const
}
const CAMERA_INTRINSIC_MATRIX_MAP: Record<string, number[]> = {
"AE_01": [
1806.82137617, 0., 1230.53175624, 0.,
1809.75580378, 766.36204406, 0., 0.,
1.
] as const,
"AE_1A": [
3467.39715751, 0., 1000.62548655, 0.,
3473.7168112, 831.64048503, 0., 0.,
1.
] as const,
"AE_08": [
2785.43931794, 0., 1254.98272372, 0.,
2788.10437965, 738.82985324, 0., 0.,
1.
] as const
}
const IMAGE_WIDTH = 2560
const IMAGE_HEIGHT = 1440
const intrinsicToFov = (intrinsic: number[], image_size: { width: number, height: number }) => {
console.assert(intrinsic.length === 9, "intrinsic must be a 3x3 matrix")
const fx = intrinsic[0]
const fy = intrinsic[4]
const cx = intrinsic[2]
const cy = intrinsic[5]
// in degrees
const fov_x = 2 * Math.atan(image_size.width / (2 * fx)) * (180 / Math.PI)
const fov_y = 2 * Math.atan(image_size.height / (2 * fy)) * (180 / Math.PI)
return { fov_x, fov_y }
}
const Scene = () => {
function Floor() {
return (
<mesh rotation-x={-Math.PI / 2} position-y={-0.5} receiveShadow>
<planeGeometry args={[15, 15]} />
<meshStandardMaterial color="#ccc" />
</mesh>
)
}
const Axes = () => {
return <axesHelper args={[15]} />
}
interface CameraViewFromExtrinsicProps {
extrinsic: number[] | THREE.Matrix4
aspect?: number
name?: string
near?: number
far?: number
fov?: number
textSize?: number
}
// https://threejs.org/docs/#examples/en/loaders/FontLoader
// https://www.ilyameerovich.com/simple-3d-text-meshes-in-three-js/
const CameraViewFromExtrinsic = ({ extrinsic, name, near, far, fov, textSize, aspect }: CameraViewFromExtrinsicProps) => {
let Rt: THREE.Matrix4
if (extrinsic instanceof THREE.Matrix4) {
Rt = extrinsic
} else if (Array.isArray(extrinsic)) {
console.assert(extrinsic.length === 16, "extrinsic must be a 4x4 matrix")
Rt = new THREE.Matrix4()
// @ts-expect-error 16 elements
Rt.set(...extrinsic)
} else {
throw new Error("extrinsic must be a 4x4 matrix or an array of 16 elements")
}
const font = new FontLoader().parse(HelvetikerRegular)
const camera = new THREE.PerspectiveCamera(fov ?? 60, aspect ?? 4 / 3, near ?? DEFAULT_NEAR, far ?? DEFAULT_FAR)
const helper = <cameraHelper args={[camera]} />
camera.applyMatrix4(Rt)
const textRef = useRef<THREE.Mesh>(null)
const { camera: viewCamera } = useThree()
useFrame(() => {
if (textRef.current) {
textRef.current.lookAt(viewCamera.position)
}
})
let text: JSX.Element | null = null
if (name) {
const geo = new THREE_ADDONS.TextGeometry(name ?? "", { font, size: textSize ?? 0.1, depth: 0.001 })
const position = new THREE.Vector3()
position.setFromMatrixPosition(Rt)
text = (
<mesh ref={textRef} position={position}>
<primitive object={geo} />
<meshStandardMaterial color="black" />
</mesh>
)
}
return (
<group>
{text}
<primitive object={camera} />
{helper}
</group>
)
}
const preProcessExtrinsic = (extrinsic: number[] | THREE.Matrix4) => {
let Rt: THREE.Matrix4
if (extrinsic instanceof THREE.Matrix4) {
Rt = extrinsic
} else if (Array.isArray(extrinsic)) {
console.assert(extrinsic.length === 16, "extrinsic must be a 4x4 matrix")
Rt = new THREE.Matrix4()
// @ts-expect-error 16 elements
Rt.set(...extrinsic)
} else {
throw new Error("extrinsic must be a 4x4 matrix or an array of 16 elements")
}
// Then handle OpenCV to OpenGL camera convention
const cameraCvt = CV_TO_GL_MAT.clone()
// Convert from Z-up to Y-up first (this affects world coordinates)
const worldCvt = Z_UP_TO_Y_UP.clone()
// Final transformation:
// 1. Convert world from Z-up to Y-up
// 2. Apply the camera transform
// 3. Convert camera coordinates from OpenCV to OpenGL
const final = new THREE.Matrix4()
final
.multiply(cameraCvt)
.multiply(Rt)
.multiply(worldCvt)
// Invert to get the camera-to-world transform
final.invert()
return final
}
interface Human3DSkeletonProps {
skeleton: AnimePosePoints3D
startFrame?: number
jointRadius?: number
boneRadius?: number
showJoints?: boolean
showBones?: boolean
frameRate?: number
}
const Human3DSkeleton = ({
skeleton,
startFrame = 0,
jointRadius = 0.01,
boneRadius = 0.005,
showJoints = true,
showBones = true,
frameRate = 30
}: Human3DSkeletonProps) => {
const [frameIndex, setFrameIndex] = useState(startFrame)
const totalFrames = skeleton.length
const onFrame: RenderCallback = (totalFrames === 0) ? (state, delta) => { } : (state: RootState, delta: number) => {
// Calculate next frame based on desired frame rate and delta time
setFrameIndex(prevFrame => {
// Calculate next frame
const nextFrame = prevFrame + frameRate * delta
// Loop back to start if we reach the end
return nextFrame >= totalFrames ? 0 : nextFrame
})
return null
}
// Use frame to animate through the skeleton poses
useFrame(onFrame)
// Get the current frame joints - use Math.floor to get the nearest frame
const currentFrame = Math.floor(frameIndex) % totalFrames
const joints = skeleton[currentFrame]
// Function to get appropriate color for a joint index
const getJointColor = (idx: number) => {
// Face joints (23-90)
if (idx >= 23 && idx <= 90) return COLOR_FACE
// Hand joints (91-132)
if (idx >= 91 && idx <= 132) return COLOR_FINGERS
// Foot joints (17-22)
if (idx >= 17 && idx <= 22) return COLOR_FOOT
// Head (0-4)
if (idx <= 4) return COLOR_HEAD
// Arms (5-10)
if (idx >= 5 && idx <= 10) return COLOR_ARMS
// Legs (11-16)
if (idx >= 11 && idx <= 16) return COLOR_LEGS
// Default
return COLOR_SPINE
}
// Transform a joint position using the coordinate system conversion
const transformJointPosition = (j: [number, number, number]) => {
const [x, y, z] = j
const V = new THREE.Vector3(x, y, z)
const worldCvt = Z_UP_TO_Y_UP_PRIME.clone()
V.applyMatrix4(worldCvt)
return V
}
// Create the joint spheres
const jointMeshes = showJoints ? joints.map((j, idx) => {
const position = transformJointPosition(j)
const color = getJointColor(idx)
return (
<mesh key={`joint-${idx}`} position={position}>
<sphereGeometry args={[jointRadius, 16, 16]} />
<meshStandardMaterial color={color} />
</mesh>
)
}) : null
// Create the bone cylinders
const boneMeshes = showBones ? (
<>
{BODY_BONES.map((bone, idx) => {
const [startIdx, endIdx] = bone
if (startIdx >= joints.length || endIdx >= joints.length) return null
const startPos = transformJointPosition(joints[startIdx])
const endPos = transformJointPosition(joints[endIdx])
const color = BODY_BONE_COLORS[idx]
// Calculate midpoint and length
const midpoint = new THREE.Vector3().addVectors(startPos, endPos).multiplyScalar(0.5)
const length = startPos.distanceTo(endPos)
// Calculate rotation
const direction = new THREE.Vector3().subVectors(endPos, startPos).normalize()
const quaternion = new THREE.Quaternion()
const up = new THREE.Vector3(0, 1, 0)
quaternion.setFromUnitVectors(up, direction)
return (
<mesh key={`bone-body-${idx}`} position={midpoint} quaternion={quaternion}>
<cylinderGeometry args={[boneRadius, boneRadius, length, 8]} />
<meshStandardMaterial color={color} />
</mesh>
)
})}
{HAND_BONES.map((bone, idx) => {
const [startIdx, endIdx] = bone
if (startIdx >= joints.length || endIdx >= joints.length) return null
const startPos = transformJointPosition(joints[startIdx])
const endPos = transformJointPosition(joints[endIdx])
// Calculate midpoint and length
const midpoint = new THREE.Vector3().addVectors(startPos, endPos).multiplyScalar(0.5)
const length = startPos.distanceTo(endPos)
// Calculate rotation
const direction = new THREE.Vector3().subVectors(endPos, startPos).normalize()
const quaternion = new THREE.Quaternion()
const up = new THREE.Vector3(0, 1, 0)
quaternion.setFromUnitVectors(up, direction)
return (
<mesh key={`bone-hand-${idx}`} position={midpoint} quaternion={quaternion}>
<cylinderGeometry args={[boneRadius, boneRadius, length, 8]} />
<meshStandardMaterial color={COLOR_FINGERS} />
</mesh>
)
})}
</>
) : null
return (
<group>
{jointMeshes}
{boneMeshes}
</group>
)
}
// const S0 = [POSE_3D_MANY[0][0]]
// const S1 = [POSE_3D_MANY[0][1]]
// const skeletons = POSE_3D_MANY.map((el) => <Human3DSkeleton jointRadius={0.005} boneRadius={0.0025} frameRate={24} skeleton={el} />)
// const skeletons = [<Human3DSkeleton jointRadius={0.05} boneRadius={0.025} frameRate={1} skeleton={S0} />,
// <Human3DSkeleton jointRadius={0.05} boneRadius={0.025} frameRate={1} skeleton={S1} />
// ]
// const skeletons = [
// <Human3DSkeleton jointRadius={0.005} boneRadius={0.0025} frameRate={24} skeleton={POSE_3D} />,
// ]
const skeletons = [
<Human3DSkeleton jointRadius={0.005} boneRadius={0.0025} frameRate={24} skeleton={[POSE_3D_04_02.a]} />,
<Human3DSkeleton jointRadius={0.005} boneRadius={0.0025} frameRate={24} skeleton={[POSE_3D_04_02.b]} />,
]
const cameras = Object.entries(CAMERA_EXTRINSIC_MATRIX_MAP).map(([key, value]) => {
const intrinsic = CAMERA_INTRINSIC_MATRIX_MAP[key]
const { fov_x, fov_y } = intrinsicToFov(intrinsic, { width: IMAGE_WIDTH, height: IMAGE_HEIGHT })
// make the far reverse proportional to the fov
const far = (1 / fov_x) * 20
return <CameraViewFromExtrinsic key={key} name={`${key}(${fov_x.toFixed(1)})`} extrinsic={preProcessExtrinsic(value)} fov={fov_x} aspect={IMAGE_WIDTH / IMAGE_HEIGHT} far={far} />
})
const scene = (<group>
{/* <OrbitControls /> */}
<ambientLight intensity={0.05} />
<directionalLight castShadow position={[3.3, 6, 4.4]} intensity={5} />
{/* <Floor /> */}
{ }
<Axes />
{cameras}
{skeletons}
</group>)
return (
// Note that we don't need to import anything, All three.js objects will be treated
// as native JSX elements, just like you can just write <div /> or <span /> in
// regular ReactDOM. The general rule is that Fiber components are available under
// the camel-case version of their name in three.js.
<>
<CameraControls />
<Stats />
{scene}
</>
)
}
function App() {
return (
<Canvas shadows style={{ background: "#e9e9e9", width: "100vw", height: "100vh" }}>
<Scene />
</Canvas>
)
}
export default App