Work in Progress (Stay tuned)
Zero-shot Image Classification
Zero-shot Text-to-Image Generation
VQA / Image Captioning
Text-driven Image Generation
Storytelling
- https://github.com/openai/CLIP
- https://github.com/openai/DALL-E
- https://github.com/clip-vil/CLIP-ViL
- https://github.com/orpatashnik/StyleCLIP
- https://github.com/YehLi/xmodaler
- https://github.com/EdenBD/MultiModalStory-demo
@misc{radford2021learning,
title={Learning Transferable Visual Models From Natural Language Supervision},
author={Alec Radford and Jong Wook Kim and Chris Hallacy and Aditya Ramesh and Gabriel Goh and Sandhini Agarwal and Girish Sastry and Amanda Askell and Pamela Mishkin and Jack Clark and Gretchen Krueger and Ilya Sutskever},
year={2021},
eprint={2103.00020},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{ramesh2021zeroshot,
title={Zero-Shot Text-to-Image Generation},
author={Aditya Ramesh and Mikhail Pavlov and Gabriel Goh and Scott Gray and Chelsea Voss and Alec Radford and Mark Chen and Ilya Sutskever},
year={2021},
eprint={2102.12092},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{shen2021clip,
title={How Much Can CLIP Benefit Vision-and-Language Tasks?},
author={Sheng Shen and Liunian Harold Li and Hao Tan and Mohit Bansal and Anna Rohrbach and Kai-Wei Chang and Zhewei Yao and Kurt Keutzer},
year={2021},
eprint={2107.06383},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{patashnik2021styleclip,
title={StyleCLIP: Text-Driven Manipulation of StyleGAN Imagery},
author={Or Patashnik and Zongze Wu and Eli Shechtman and Daniel Cohen-Or and Dani Lischinski},
year={2021},
eprint={2103.17249},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{li2021xmodaler,
title={X-modaler: A Versatile and High-performance Codebase for Cross-modal Analytics},
author={Yehao Li and Yingwei Pan and Jingwen Chen and Ting Yao and Tao Mei},
year={2021},
eprint={2108.08217},
archivePrefix={arXiv},
primaryClass={cs.CV}
}