{"payload":{"pageCount":1,"repositories":[{"type":"Public","name":"Screen-Point-and-Read","owner":"eric-ai-lab","isFork":false,"description":"Code repo for \"Read Anywhere Pointed: Layout-aware GUI Screen Reading with Tree-of-Lens Grounding\"","allTopics":["screen-reader","ai-agents","grounding","gui-agents","tree-of-lens","layout-understanding"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":11,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-03T05:16:16.275Z"}},{"type":"Public","name":"MMWorld","owner":"eric-ai-lab","isFork":false,"description":"Official repo of the paper \"MMWorld: Towards Multi-discipline Multi-faceted World Model Evaluation in Videos\"","allTopics":["evaluation","video-understanding","video-dataset","multi-disciplinary","multimodal-large-language-models","world-model"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":14,"forksCount":1,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-02T06:29:00.144Z"}},{"type":"Public","name":"ProbMed","owner":"eric-ai-lab","isFork":false,"description":"\"Worse than Random? An Embarrassingly Simple Probing Evaluation of Large Multimodal Models in Medical VQA\"","allTopics":["evaluation","vision-and-language","medical-vqa","medical-diagnosis","llms","large-multimodal-models"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":10,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-24T03:27:43.341Z"}},{"type":"Public","name":"via-video","owner":"eric-ai-lab","isFork":false,"description":"","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":19,"forksCount":0,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-20T07:32:01.287Z"}},{"type":"Public","name":"R2H","owner":"eric-ai-lab","isFork":false,"description":"Official implementation of the EMNLP 2023 paper \"R2H: Building Multimodal Navigation Helpers that Respond to Help Requests\"","allTopics":["helper","navigation","dialogue","multimodal","embodied-agent","response-generation","ai-agent"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":3,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-19T22:38:35.083Z"}},{"type":"Public","name":"awesome-vision-language-navigation","owner":"eric-ai-lab","isFork":false,"description":"A curated list for vision-and-language navigation. ACL 2022 paper \"Vision-and-Language Navigation: A Survey of Tasks, Methods, and Future Directions\"","allTopics":["navigation","vision-and-language","embodied-agent","vision-and-language-navigation"],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":299,"forksCount":17,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-02T03:50:13.767Z"}},{"type":"Public","name":"Discffusion","owner":"eric-ai-lab","isFork":false,"description":"Official repo for the paper \"Discffusion: Discriminative Diffusion Models as Few-shot Vision and Language Learners\"","allTopics":["vision-and-language","few-shot-learning","discriminative-learning","diffusion-models"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":25,"forksCount":2,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-27T06:13:35.220Z"}},{"type":"Public","name":"MultipanelVQA","owner":"eric-ai-lab","isFork":false,"description":"Code for the MultipanelVQA benchmark \"Muffin or Chihuahua? Challenging Large Vision-Language Models with Multipanel VQA\"","allTopics":["vqa","vlm","mllm","screen-ai","multipanel-understanding"],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":0,"issueCount":0,"starsCount":6,"forksCount":0,"license":"MIT License","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,7,1,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-11T17:36:06.957Z"}},{"type":"Public","name":"Naivgation-as-wish","owner":"eric-ai-lab","isFork":false,"description":"Official implementation of the NAACL 2024 paper \"Navigation as Attackers Wish? Towards Building Robust Embodied Agents under Federated Learning\"","allTopics":["robustness","attack-defense","federated-learning","embodied-agent","vision-and-language-navigation"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":4,"forksCount":0,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-10T16:27:51.753Z"}},{"type":"Public","name":"ComCLIP","owner":"eric-ai-lab","isFork":false,"description":"Official implementation and dataset for the NAACL 2024 paper \"ComCLIP: Training-Free Compositional Image and Text Matching\"","allTopics":["causality","clip","svo","slip","vision-and-language","compositionality","flickr8k-dataset","image-text-matching","flickr30k","image-text-retrieval","winoground","blip2"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":27,"forksCount":0,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-10T05:17:24.784Z"}},{"type":"Public","name":"swap-anything","owner":"eric-ai-lab","isFork":false,"description":"\"SwapAnything: Enabling Arbitrary Object Swapping in Personalized Visual Editing\"","allTopics":["image-editing","personalization","diffusion-models","subject-driven-generation","photoswapping","swap-anything"],"primaryLanguage":null,"pullRequestCount":0,"issueCount":1,"starsCount":191,"forksCount":5,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-10T02:13:58.642Z"}},{"type":"Public","name":"llm_coordination","owner":"eric-ai-lab","isFork":false,"description":"Code repository for the paper \"LLM-Coordination: Evaluating and Analyzing Multi-agent Coordination Abilities in Large Language Models\"","allTopics":["multiagent","llms","coordination-game","agent-coordination"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":17,"forksCount":1,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-05T08:23:56.784Z"}},{"type":"Public","name":"minigpt-5.github.io","owner":"eric-ai-lab","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"JavaScript","color":"#f1e05a"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-03T22:03:23.176Z"}},{"type":"Public","name":"MiniGPT-5","owner":"eric-ai-lab","isFork":false,"description":"Official implementation of paper \"MiniGPT-5: Interleaved Vision-and-Language Generation via Generative Vokens\"","allTopics":["transformers","diffusion-models","multimodal-generation","multimodal-llm"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":8,"starsCount":832,"forksCount":51,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-19T02:45:56.566Z"}},{"type":"Public","name":"photoswap","owner":"eric-ai-lab","isFork":false,"description":"Official implementation of the NeurIPS 2023 paper \"Photoswap: Personalized Subject Swapping in Images\"","allTopics":["image-editing","personalization","diffusion-models","generative-ai","photoswap"],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":0,"issueCount":4,"starsCount":333,"forksCount":22,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-02-28T20:23:14.856Z"}},{"type":"Public","name":"Aerial-Vision-and-Dialog-Navigation","owner":"eric-ai-lab","isFork":false,"description":"Codebase of ACL 2023 Findings \"Aerial Vision-and-Dialog Navigation\"","allTopics":["navigation","aerial-imagery","drone-navigation","vision-and-language","vln"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":31,"forksCount":6,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-16T07:10:17.307Z"}},{"type":"Public","name":"PECTVLM","owner":"eric-ai-lab","isFork":false,"description":"Code implementation for Findings of EMNLP 2023 paper \"Parameter-Efficient Cross-lingual Transfer of Vision and Language Models via Translation-based Alignment\"","allTopics":[],"primaryLanguage":{"name":"Smalltalk","color":"#596706"},"pullRequestCount":0,"issueCount":0,"starsCount":6,"forksCount":0,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-10-17T17:56:14.598Z"}},{"type":"Public","name":"T2IAT","owner":"eric-ai-lab","isFork":false,"description":"T2IAT: Measuring Valence and Stereotypical Biases in Text-to-Image Generation","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":7,"forksCount":0,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-08-15T19:46:57.617Z"}},{"type":"Public","name":"PEViT","owner":"eric-ai-lab","isFork":false,"description":"Official implementation of AAAI 2023 paper \"Parameter-efficient Model Adaptation for Vision Transformers\"","allTopics":["pytorch","image-classification","fine-tuning","vision-transformer","parameter-efficient-tuning"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":8,"starsCount":94,"forksCount":4,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-08-07T18:36:03.907Z"}},{"type":"Public","name":"VLMbench","owner":"eric-ai-lab","isFork":false,"description":"NeurIPS 2022 Paper \"VLMbench: A Compositional Benchmark for Vision-and-Language Manipulation\"","allTopics":["language-grounding","vision-and-language","robotic-manipulation","compositionality","embodied-ai"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":4,"starsCount":75,"forksCount":8,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-03-05T04:59:17.970Z"}},{"type":"Public","name":"Mitigate-Gender-Bias-in-Image-Search","owner":"eric-ai-lab","isFork":false,"description":"Code for the EMNLP 2021 Oral paper \"Are Gender-Neutral Queries Really Gender-Neutral? Mitigating Gender Bias in Image Search\" https://arxiv.org/abs/2109.05433","allTopics":["image-search","multimodality","gender-bias","fairness-ml","vision-language"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":2,"starsCount":12,"forksCount":1,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-02-06T19:12:44.298Z"}},{"type":"Public","name":"CPL","owner":"eric-ai-lab","isFork":false,"description":"Official implementation of our EMNLP 2022 paper \"CPL: Counterfactual Prompt Learning for Vision and Language Models\"","allTopics":["vqa","image-classification","causal-inference","vision-and-language","image-text-retrieval","counterfactual-reasoning","prompt-tuning"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":6,"starsCount":31,"forksCount":4,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-12-05T05:45:19.174Z"}},{"type":"Public","name":"ACLToolBox","owner":"eric-ai-lab","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":8,"forksCount":1,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-11-15T21:28:38.183Z"}},{"type":"Public","name":"FedVLN","owner":"eric-ai-lab","isFork":false,"description":"[ECCV 2022] Official pytorch implementation of the paper \"FedVLN: Privacy-preserving Federated Vision-and-Language Navigation\"","allTopics":["federated-learning","privacy-preserving-machine-learning","vision-and-language-navigation"],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":0,"issueCount":0,"starsCount":12,"forksCount":2,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-10-08T21:16:58.326Z"}}],"repositoryCount":24,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"eric-ai-lab repositories"}