first commit

donydchen · Mar 21, 2024 · 732f1ca · 732f1ca
commit 732f1ca
Show file tree

Hide file tree

Showing 132 changed files with 14,137 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,178 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+/datasets
+/dataset_cache
+
+# Outputs
+/outputs
+/lightning_logs
+/checkpoints
+
+.bashrc
+/launcher_venv
+/slurm_logs
+*.torch
+*.ckpt
+table.tex
+/baselines
+/test/*
+point_clouds*
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,36 @@
+{
+ // Automatically format using Black on save.
+ "editor.formatOnSave": true,
+ // Draw a ruler at Black's column width.
+ "editor.rulers": [88],
+ // Hide non-code files.
+ "files.exclude": {
+ "**/.git": true,
+ "**/.svn": true,
+ "**/.hg": true,
+ "**/CVS": true,
+ "**/.DS_Store": true,
+ "**/Thumbs.db": true,
+ "**/__pycache__": true,
+ "**/venv": true,
+ "**/lightning_logs": true,
+ "**/dataset_cache": true,
+ "**/.ruff_cache": true
+ // "**/datasets": true,
+ },
+ "[python]": {
+ "editor.defaultFormatter": "ms-python.black-formatter"
+ },
+ "debug.focusWindowOnBreak": false,
+ "files.watcherExclude": {
+ "**/.git/**": true,
+ "**/checkpoints/**": true,
+ "**/datasets/**": true,
+ "**/lightning_logs/**": true,
+ "**/outputs/**": true,
+ "**/dataset_cache/**": true,
+ "**/.ruff_cache/**": true,
+ "**/venv/**": true,
+ "**/datasets": true
+ }
+}
diff --git a/README.md b/README.md
@@ -0,0 +1,116 @@
+# MVSplat
+
+This is the official implementation of **MVSplat: Efficient 3D Gaussian Splatting from Sparse Multi-View Images** by Yuedong Chen, Haofei Xu, Chuanxia Zheng, Bohan Zhuang, Marc Pollefeys, Andreas Geiger, Tat-Jen Cham, and Jianfei Cai.
+
+### [Project Page](https://donydchen.github.io/mvsplat/) | [arXiv](https://donydchen.github.io/mvsplat/) | [Pretrained Models](https://drive.google.com/drive/folders/14_E_5R6ojOWnLSrSVLVEMHnTiKsfddjU) 
+
+
+
+
+
+## Installation
+
+To get started, create a conda virtual environment using Python 3.10+ and install requirements:
+
+```bash
+conda create -n mvsplat python=3.10
+conda activate mvsplat
+pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118
+pip install -r requirements.txt
+```
+
+## Acquiring Datasets
+
+MVSplat utilises the same dataset settings as pixelSplat. Below we quote pixelSplat's [detailed instructions](https://github.com/dcharatan/pixelsplat?tab=readme-ov-file#acquiring-datasets) on getting datasets.
+
+> pixelSplat was trained using versions of the RealEstate10k and ACID datasets that were split into ~100 MB chunks for use on server cluster file systems. Small subsets of the Real Estate 10k and ACID datasets in this format can be found [here](https://drive.google.com/drive/folders/1joiezNCyQK2BvWMnfwHJpm2V77c7iYGe?usp=sharing). To use them, simply unzip them into a newly created `datasets` folder in the project root directory.
+
+> If you would like to convert downloaded versions of the Real Estate 10k and ACID datasets to our format, you can use the [scripts here](https://github.com/dcharatan/real_estate_10k_tools). Reach out to us (pixelSplat) if you want the full versions of our processed datasets, which are about 500 GB and 160 GB for Real Estate 10k and ACID respectively.
+
+## Run the Code
+
+### Evaluation
+
+To render frames and compute scores from an existing checkpoint,
+
+* get the [pretrained models](https://drive.google.com/drive/folders/14_E_5R6ojOWnLSrSVLVEMHnTiKsfddjU), and save them to `/checkpoints`
+
+* run the following:
+
+```bash
+# re10k
+python -m src.main +experiment=re10k \
+checkpointing.load=checkpoints/re10k.ckpt \
+mode=test \
+dataset/view_sampler=evaluation \
+test.compute_scores=true
+
+# acid
+python -m src.main +experiment=acid \
+checkpointing.load=checkpoints/acid.ckpt \
+mode=test \
+dataset/view_sampler=evaluation \
+dataset.view_sampler.index_path=assets/evaluation_index_acid.json \
+test.compute_scores=true
+```
+
+* the rendered novel views will be stored under `outputs/test`
+
+To render videos from a pretrained model, run the following
+
+```bash
+# re10k
+python -m src.main +experiment=re10k \
+checkpointing.load=checkpoints/re10k.ckpt \
+mode=test \
+dataset/view_sampler=evaluation \
+dataset.view_sampler.index_path=assets/evaluation_index_re10k_video.json \
+test.save_video=true \
+test.save_image=false \
+test.compute_scores=false
+```
+
+### Training
+
+Run the following:
+
+```bash
+# download the backbone pretrained weight from unimath and save to 'checkpoints/'
+wget 'https://s3.eu-central-1.amazonaws.com/avg-projects/unimatch/pretrained/gmdepth-scale1-resumeflowthings-scannet-5d9d7964.pth' -P checkpoints
+# train mvsplat
+python -m src.main +experiment=re10k data_loader.train.batch_size=14
+```
+
+The setting requires a single GPU with 80 GB of VRAM (A100). Set a smaller `data_loader.train.batch_size` to reduce memory usage.
+
+### Ablations
+
+We also provide a collection of our [ablation models](https://drive.google.com/drive/folders/14_E_5R6ojOWnLSrSVLVEMHnTiKsfddjU) (under folder 'ablations'). To evaluate them, *e.g.*, the 'base' model, run the following command
+
+```bash
+# Table 3: base
+python -m src.main +experiment=re10k \
+checkpointing.load=checkpoints/ablations/re10k_worefine.ckpt \
+mode=test \
+dataset/view_sampler=evaluation \
+test.compute_scores=true \
+wandb.name=abl/re10k_base \
+model.encoder.wo_depth_refine=true 
+```
+
+Model running commands can be found at [more_commands.sh](more_commands.sh).
+
+## BibTeX
+
+```
+@inproceedings{chen2024mvsplat,
+ title={MVSplat: Efficient 3D Gaussian Splatting from Sparse Multi-View Images},
+ author={Chen, Yuedong and Xu, Haofei and Zheng, Chuanxia and Zhuang, Bohan and Pollefeys, Marc and Geiger, Andreas and Cham, Tat-Jen and Cai, Jianfei},
+ journal={arXiv},
+ year={2024},
+}
+```
+
+## Acknowledgements
+
+The project is largely based on [pixelSplat](https://github.com/dcharatan/pixelsplat) and has incorporated numerous code snippets from [UniMatch](https://github.com/autonomousvision/unimatch). Many thanks to these two projects for their excellent contributions!
diff --git a/assets/evaluation_index_acid.json b/assets/evaluation_index_acid.json
diff --git a/assets/evaluation_index_acid_video.json b/assets/evaluation_index_acid_video.json
diff --git a/assets/evaluation_index_re10k.json b/assets/evaluation_index_re10k.json
diff --git a/assets/evaluation_index_re10k_video.json b/assets/evaluation_index_re10k_video.json
diff --git a/config/compute_metrics.yaml b/config/compute_metrics.yaml
@@ -0,0 +1,25 @@
+defaults:
+ - dataset: re10k
+ - model/encoder: epipolar
+ - loss: []
+ - optional dataset/view_sampler_dataset_specific_config: ${dataset/view_sampler}_${dataset}
+ - override dataset/view_sampler: evaluation
+
+data_loader:
+ train:
+ num_workers: 0
+ persistent_workers: true
+ batch_size: 1
+ seed: 1234
+ test:
+ num_workers: 4
+ persistent_workers: false
+ batch_size: 1
+ seed: 2345
+ val:
+ num_workers: 0
+ persistent_workers: true
+ batch_size: 1
+ seed: 3456
+
+seed: 111123
diff --git a/config/dataset/re10k.yaml b/config/dataset/re10k.yaml
@@ -0,0 +1,22 @@
+defaults:
+ - view_sampler: bounded
+
+name: re10k
+roots: [datasets/re10k]
+make_baseline_1: true
+augment: true
+
+image_shape: [180, 320]
+background_color: [0.0, 0.0, 0.0]
+cameras_are_circular: false
+
+baseline_epsilon: 1e-3
+max_fov: 100.0
+
+skip_bad_shape: true
+near: -1.
+far: -1.
+baseline_scale_bounds: true
+shuffle_val: true
+test_len: -1
+test_chunk_interval: 1
diff --git a/config/dataset/view_sampler/all.yaml b/config/dataset/view_sampler/all.yaml
@@ -0,0 +1 @@
+name: all
diff --git a/config/dataset/view_sampler/arbitrary.yaml b/config/dataset/view_sampler/arbitrary.yaml
@@ -0,0 +1,7 @@
+name: arbitrary
+
+num_target_views: 1
+num_context_views: 2
+
+# If you want to hard-code context views, do so here.
+context_views: null