Enhancing your dataset with synthetic data can be a powerful tactic for improving your model. Synthetic images are cheaper to generate than collecting and labeling images from the real world and their annotations are guaranteed to be consistently labeled and pixel-perfect every time.
Unity Perception is one tool that can be used to generate training data from 3D models. But their export format isn't supported by any machine learning models. Roboflow can convert these files for easy use and help you combine them with your existing dataset to create a composite dataset that is better than the sum of its parts.
Below, learn the structure of Unity Perception JSON.
{
"version": "0.0.1",
"captures": [{
"id": "b49f1317-f3ce-42e3-8201-b85b0bd1574b",
"sequence_id": "96bf8ade-5d77-49a4-82e2-dc1ef567ab82",
"step": 0,
"timestamp": 10.0,
"sensor": {
"sensor_id": "edf45916-afec-9881-8d21-922241c54310",
"ego_id": "1ceb788a-8155-4860-99da-2b406536f198",
"modality": "camera",
"translation": [
0.0,
0.0,
0.0
],
"rotation": [
0.0,
0.0,
0.0,
1.0
],
"camera_intrinsic": [
[
0.999999762,
0.0,
0.0
],
[
0.0,
1.77777731,
0.0
],
[
0.2,
0.266666651,
-1.0006001
]
]
},
"ego": {
"ego_id": "1ceb788a-8155-4860-99da-2b406536f198",
"translation": [
-595.11,
1175.75,
487.64
],
"rotation": [
0.0778284,
0.9212198,
-0.246840149,
0.290459484
],
"velocity": null,
"acceleration": null
},
"filename": "RGB2327d665-d71f-4c4f-8c77-66df8fa29202/rgb_2.png",
"format": "PNG",
"annotations": [{
"id": "a6aef966-1fb3-4d59-91df-a44866e81038",
"annotation_definition": "f9f22e05-443f-4602-a422-ebe4ea9b55cb",
"values": [{
"label_id": 1,
"label_name": "person",
"instance_id": 2,
"x": 875.0,
"y": 303.0,
"width": 151.0,
"height": 122.0
},
{
"label_id": 1,
"label_name": "person",
"instance_id": 3,
"x": 1226.0,
"y": 382.0,
"width": 181.0,
"height": 130.0
},
{
"label_id": 1,
"label_name": "person",
"instance_id": 1,
"x": 660.0,
"y": 402.0,
"width": 148.0,
"height": 112.0
},
{
"label_id": 1,
"label_name": "person",
"instance_id": 5,
"x": 1003.0,
"y": 381.0,
"width": 148.0,
"height": 141.0
},
{
"label_id": 1,
"label_name": "person",
"instance_id": 42,
"x": 831.0,
"y": 389.0,
"width": 230.0,
"height": 225.0
}
]
}]
},
{
"id": "87182978-7b34-4157-85f2-9fa0b57af085",
"sequence_id": "96bf8ade-5d77-49a4-82e2-dc1ef567ab82",
"step": 1,
"timestamp": 20.0,
"sensor": {
"sensor_id": "edf45916-afec-9881-8d21-922241c54310",
"ego_id": "1ceb788a-8155-4860-99da-2b406536f198",
"modality": "camera",
"translation": [
0.0,
0.0,
0.0
],
"rotation": [
0.0,
0.0,
0.0,
1.0
],
"camera_intrinsic": [
[
0.999999762,
0.0,
0.0
],
[
0.0,
1.77777731,
0.0
],
[
0.2,
0.266666651,
-1.0006001
]
]
},
"ego": {
"ego_id": "1ceb788a-8155-4860-99da-2b406536f198",
"translation": [
-595.11,
1175.75,
487.64
],
"rotation": [
0.0778284,
0.9212198,
-0.246840149,
0.290459484
],
"velocity": null,
"acceleration": null
},
"filename": "RGB2327d665-d71f-4c4f-8c77-66df8fa29202/rgb_3.png",
"format": "PNG",
"annotations": [{
"id": "94bffb82-7753-4d79-8294-66e2976d7362",
"annotation_definition": "f9f22e05-443f-4602-a422-ebe4ea9b55cb",
"values": [{
"label_id": 1,
"label_name": "person",
"instance_id": 2,
"x": 875.0,
"y": 303.0,
"width": 151.0,
"height": 122.0
},
{
"label_id": 1,
"label_name": "person",
"instance_id": 3,
"x": 1226.0,
"y": 382.0,
"width": 181.0,
"height": 130.0
},
{
"label_id": 1,
"label_name": "person",
"instance_id": 1,
"x": 660.0,
"y": 402.0,
"width": 148.0,
"height": 112.0
},
{
"label_id": 1,
"label_name": "person",
"instance_id": 5,
"x": 1003.0,
"y": 381.0,
"width": 149.0,
"height": 142.0
},
{
"label_id": 1,
"label_name": "person",
"instance_id": 42,
"x": 831.0,
"y": 389.0,
"width": 230.0,
"height": 225.0
},
{
"label_id": 1,
"label_name": "person",
"instance_id": 4,
"x": 1317.0,
"y": 455.0,
"width": 201.0,
"height": 166.0
},
{
"label_id": 1,
"label_name": "person",
"instance_id": 33,
"x": 454.0,
"y": 462.0,
"width": 213.0,
"height": 216.0
},
{
"label_id": 1,
"label_name": "person",
"instance_id": 13,
"x": 899.0,
"y": 504.0,
"width": 308.0,
"height": 286.0
},
{
"label_id": 1,
"label_name": "person",
"instance_id": 16,
"x": 222.0,
"y": 501.0,
"width": 189.0,
"height": 290.0
},
{
"label_id": 4,
"label_name": "hard-hat",
"instance_id": 38,
"x": 295.0,
"y": 685.0,
"width": 113.0,
"height": 173.0
},
{
"label_id": 1,
"label_name": "person",
"instance_id": 10,
"x": 40.0,
"y": 630.0,
"width": 202.0,
"height": 252.0
},
{
"label_id": 12,
"label_name": "car",
"instance_id": 46,
"x": 827.0,
"y": 712.0,
"width": 257.0,
"height": 229.0
},
{
"label_id": 1,
"label_name": "person",
"instance_id": 26,
"x": 1402.0,
"y": 810.0,
"width": 254.0,
"height": 187.0
},
{
"label_id": 13,
"label_name": "bicycle",
"instance_id": 7,
"x": 667.0,
"y": 641.0,
"width": 298.0,
"height": 394.0
},
{
"label_id": 5,
"label_name": "door",
"instance_id": 31,
"x": 1143.0,
"y": 839.0,
"width": 652.0,
"height": 241.0
}
]
}]
}]
}
With Roboflow supervision, an open source Python package with utilities for completing computer vision tasks, you can merge and split detections in Unity Perception JSON. Read our dedicated guides to learn how to merge and split Unity Perception JSON detections.
Below, see model architectures that require data in the Unity Perception JSON format when training a new model.
On each page below, you can find links to our guides that show how to plot predictions from the model, and complete other common tasks like detecting small objects with the model.