# Example of a paper entry
@misc{qian2023communicative,
      title={Communicative Agents for Software Development}, 
      author={Chen Qian and Xin Cong and Wei Liu and Cheng Yang and Weize Chen and Yusheng Su and Yufan Dang and Jiahao Li and Juyuan Xu and Dahai Li and Zhiyuan Liu and Maosong Sun},
      year={2023},
      month={7},
      url={https://arxiv.org/abs/2307.07924},
      environments = {collaboration, embodied},
      agents = {prompting_and_in_context_learning, more_than_three_agents},
      evaluation = {rule_based},
      other = {n/a},
      eprint={2307.07924},
      archivePrefix={arXiv},
      primaryClass={cs.SE},
}
## Papers
### Surveys and Overview
@article{fogg2002persuasive,
  title={Persuasive technology: using computers to change what we think and do},
  author={Fogg, Brian J},
  journal={Ubiquity},
  volume={2002},
  number={December},
  pages={2},
  year={2002},
  month={12},
  publisher={ACM New York, NY, USA},
  url={https://dl.acm.org/doi/10.1145/764008.763957},
  environments = {n/a},
  agents = {n/a},
  evaluation = {n/a},
  other = {n/a},
}

@article{mathur2024advancing,
  title        = {Advancing Social Intelligence in AI Agents: Technical Challenges and Open Questions},
  author       = {Mathur, Leena and Liang, Paul Pu and Morency, Louis-Philippe},
  url          = {https://arxiv.org/abs/2404.11023},
  journal      = {arXiv preprint arXiv:2404.11023},
  year         = {2024},
  month        = {4},
  environments = {collaboration, competition, mixed_objectives, text, embodied, virtual, robotics},
  agents       = {two_agents, reinforcement_learning, agents_with_personas},
  evaluation   = {human},
  other        = {n/a},
}

@article{yang2024social,
  title        = {Social Skill Training with Large Language Models},
  author       = {Yang, Diyi and Ziems, Caleb and Held, William and Shaikh, Omar and Bernstein, Michael S and Mitchell, John},
  journal      = {arXiv preprint arXiv:2404.04204},
  url          = {https://arxiv.org/abs/2404.04204},
  year         = {2024},
  month        = {4},
  environments = {collaboration, competition, mixed_objectives, text},
  agents       = {two_agents, reinforcement_learning, agents_with_personas},
  evaluation   = {human, rule_based},
  other        = {n/a},
}

@article{li2024social,
  title={Social Intelligence Data Infrastructure: Structuring the Present and Navigating the Future},
  author={Li, Minzhi and Shi, Weiyan and Ziems, Caleb and Yang, Diyi},
  journal={arXiv preprint arXiv:2403.14659},
  url = {https://arxiv.org/abs/2403.14659},
  year={2024},
  month={2},
  environments = {text},
  agents = {n/a},
  evaluation = {human},
  other = {n/a},
  
}


### Environments

#### Text Environments

@article{environments/language,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}

@misc{chuang2024simulating,
      title={Simulating Opinion Dynamics with Networks of LLM-based Agents}, 
      author={Yun-Shiuan Chuang and Agam Goyal and Nikunj Harlalka and Siddharth Suresh and Robert Hawkins and Sijia Yang and Dhavan Shah and Junjie Hu and Timothy T. Rogers},
      year={2024},
      eprint={2311.09618},
      archivePrefix={arXiv},
      primaryClass={physics.soc-ph},
      url={https://arxiv.org/abs/2311.09618},
      environments = {text},
      agents = {more_than_three_agents},
      evaluation = {rule_based},
      other = {n/a},
}

@article{Bard_2020,
   title={The Hanabi challenge: A new frontier for AI research},
   volume={280},
   ISSN={0004-3702},
   url={http://dx.doi.org/10.1016/j.artint.2019.103216},
   DOI={10.1016/j.artint.2019.103216},
   journal={Artificial Intelligence},
   publisher={Elsevier BV},
   author={Bard, Nolan and Foerster, Jakob N. and Chandar, Sarath and Burch, Neil and Lanctot, Marc and Song, H. Francis and Parisotto, Emilio and Dumoulin, Vincent and Moitra, Subhodeep and Hughes, Edward and Dunning, Iain and Mourad, Shibl and Larochelle, Hugo and Bellemare, Marc G. and Bowling, Michael},
   year={2020},
   environments={collaboration, text},
   agents={more_than_three_agents},
   evaluation={rule_based},
   other={n/a},
   month={3}, pages={103216} }

@inproceedings{he-etal-2018-decoupling,
    title = "Decoupling Strategy and Generation in Negotiation Dialogues",
    author = "He, He  and
      Chen, Derek  and
      Balakrishnan, Anusha  and
      Liang, Percy",
    editor = "Riloff, Ellen  and
      Chiang, David  and
      Hockenmaier, Julia  and
      Tsujii, Jun{'}ichi",
    booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
    month = {10},
    year = "2018",
    address = "Brussels, Belgium",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/D18-1256",
    doi = "10.18653/v1/D18-1256",
    pages = "2333--2343",
    environments={text, mixed_objectives},
    agents={finetuning, reinforcement_learning, two_agents, agents_with_memory},
    evaluation={human},
    other={n/a}
}

@inproceedings{lewis-etal-2017-deal,
    title = "Deal or No Deal? End-to-End Learning of Negotiation Dialogues",
    author = "Lewis, Mike  and
      Yarats, Denis  and
      Dauphin, Yann  and
      Parikh, Devi  and
      Batra, Dhruv",
    editor = "Palmer, Martha  and
      Hwa, Rebecca  and
      Riedel, Sebastian",
    booktitle = "Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing",
    month = {9},
    year = "2017",
    address = "Copenhagen, Denmark",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/D17-1259",
    doi = "10.18653/v1/D17-1259",
    pages = "2443--2453",
    environments={text, mixed_objectives},
    agents={reinforcement_learning, two_agents, agents_with_memory},
    evaluation={rule_based},
    other={human_agent}
}

@inproceedings{wang-etal-2019-persuasion,
    title = "Persuasion for Good: Towards a Personalized Persuasive Dialogue System for Social Good",
    author = "Wang, Xuewei  and
      Shi, Weiyan  and
      Kim, Richard  and
      Oh, Yoojung  and
      Yang, Sijia  and
      Zhang, Jingwen  and
      Yu, Zhou",
    editor = "Korhonen, Anna  and
      Traum, David  and
      M{\`a}rquez, Llu{\'\i}s",
    booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
    month = jul,
    year = "2019",
    address = "Florence, Italy",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/P19-1566",
    doi = "10.18653/v1/P19-1566",
    pages = "5635--5649",
    abstract = "Developing intelligent persuasive conversational agents to change people{'}s opinions and actions for social good is the frontier in advancing the ethical development of automated dialogue systems. To do so, the first step is to understand the intricate organization of strategic disclosures and appeals employed in human persuasion conversations. We designed an online persuasion task where one participant was asked to persuade the other to donate to a specific charity. We collected a large dataset with 1,017 dialogues and annotated emerging persuasion strategies from a subset. Based on the annotation, we built a baseline classifier with context information and sentence-level features to predict the 10 persuasion strategies used in the corpus. Furthermore, to develop an understanding of personalized persuasion processes, we analyzed the relationships between individuals{'} demographic and psychological backgrounds including personality, morality, value systems, and their willingness for donation. Then, we analyzed which types of persuasion strategies led to a greater amount of donation depending on the individuals{'} personal backgrounds. This work lays the ground for developing a personalized persuasive dialogue system.",
    environments={text, mixed_objectives},
    agents={two_agents, finetuning},
    evaluation={human, rule_based},
    other={human_agent}
}

@article{majumder2023tell,
  title={To Tell The Truth: Language of Deception and Language Models},
  author={Majumder, Bodhisattwa Prasad and Hazra, Sanchaita},
  journal={arXiv preprint arXiv:2311.07092},
  year={2024},
  month={4},
  url={https://arxiv.org/abs/2311.07092},
  environments={text, mixed_objectives},
  agents={more_than_three_agents},
  evaluation={rule_based},
  other={more_information_asymmetrical}
}


@inproceedings{peskov-etal-2020-takes,
    title = "It Takes Two to Lie: One to Lie, and One to Listen",
    author = "Peskov, Denis  and
      Cheng, Benny  and
      Elgohary, Ahmed  and
      Barrow, Joe  and
      Danescu-Niculescu-Mizil, Cristian  and
      Boyd-Graber, Jordan",
    editor = "Jurafsky, Dan  and
      Chai, Joyce  and
      Schluter, Natalie  and
      Tetreault, Joel",
    booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
    month = jul,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.acl-main.353",
    doi = "10.18653/v1/2020.acl-main.353",
    pages = "3811--3854",
    abstract = "Trust is implicit in many online text conversations{---}striking up new friendships, or asking for tech support. But trust can be betrayed through deception. We study the language and dynamics of deception in the negotiation-based game Diplomacy, where seven players compete for world domination by forging and breaking alliances with each other. Our study with players from the Diplomacy community gathers 17,289 messages annotated by the sender for their intended truthfulness and by the receiver for their perceived truthfulness. Unlike existing datasets, this captures deception in long-lasting relationships, where the interlocutors strategically combine truth with lies to advance objectives. A model that uses power dynamics and conversational contexts can predict when a lie occurs nearly as well as human players.",
    environments={text, mixed_objectives},
    agents={more_than_three_agents},
    evaluation={model_based},
    other={human_agent}
}

@article{LanctotEtAl2019OpenSpiel,
  title     = {{OpenSpiel}: A Framework for Reinforcement Learning in Games},
  author    = {Marc Lanctot and Edward Lockhart and Jean-Baptiste Lespiau and
               Vinicius Zambaldi and Satyaki Upadhyay and Julien P\'{e}rolat and
               Sriram Srinivasan and Finbarr Timbers and Karl Tuyls and
               Shayegan Omidshafiei and Daniel Hennes and Dustin Morrill and
               Paul Muller and Timo Ewalds and Ryan Faulkner and J\'{a}nos Kram\'{a}r
               and Bart De Vylder and Brennan Saeta and James Bradbury and David Ding
               and Sebastian Borgeaud and Matthew Lai and Julian Schrittwieser and
               Thomas Anthony and Edward Hughes and Ivo Danihelka and Jonah Ryan-Davis},
  month     = {8},
  year      = {2019},
  eprint    = {1908.09453},
  archivePrefix = {arXiv},
  primaryClass = {cs.LG},
  journal   = {CoRR},
  volume    = {abs/1908.09453},
  url       = {http://arxiv.org/abs/1908.09453},
  environments={collaboration, competition, mixed_objectives, text},
  agents={two_agents, more_than_three_agents, reinforcement_learning},
  evaluation={rule_based},
  other={n/a}
}

@article{zha2019rlcard,
  title={RLCard: A Toolkit for Reinforcement Learning in Card Games},
  author={Zha, Daochen and Lai, Kwei-Herng and Cao, Yuanpu and Huang, Songyi and Wei, Ruzhe and Guo, Junyu and Hu, Xia},
  journal={arXiv preprint arXiv:1910.04376},
  month = {7},
  year={2019},
  environments={collaboration, competition, mixed_objectives, text},
  agents={two_agents, more_than_three_agents, reinforcement_learning},
  evaluation={rule_based},
  other={n/a},
  url={https://github.com/datamllab/rlcard}
}

@article{meta2022human,
  title={Human-level play in the game of Diplomacy by combining language models with strategic reasoning},
  author={Meta Fundamental AI Research Diplomacy Team (FAIR)† and Bakhtin, Anton and Brown, Noam and Dinan, Emily and Farina, Gabriele and Flaherty, Colin and Fried, Daniel and Goff, Andrew and Gray, Jonathan and Hu, Hengyuan and others},
  journal={Science},
  volume={378},
  number={6624},
  pages={1067--1074},
  month={11},
  year={2022},
  publisher={American Association for the Advancement of Science},
  url={https://www.science.org/doi/full/10.1126/science.ade9097},
  environments={competition, text},
  agents={more_than_three_agents, reinforcement_learning, finetuning},
  evaluation={rule_based},
  other={human_agent}
}

@software{multigrid,
  author = {Oguntola, Ini},
  title = {Fast Multi-Agent Gridworld Environments for Gymnasium},
  url = {https://github.com/ini/multigrid},
  month = {3},
  year = {2023},
  journal = {GitHub},
  environments={collaboration, competition, text},
  agents={two_agents, more_than_three_agents, reinforcement_learning},
  evaluation={rule_based},
  other={n/a}
}

@inproceedings{callison-burch-etal-2022-dungeons,
    title = "Dungeons and Dragons as a Dialog Challenge for Artificial Intelligence",
    author = "Callison-Burch, Chris  and
      Tomar, Gaurav Singh  and
      Martin, Lara  and
      Ippolito, Daphne  and
      Bailis, Suma  and
      Reitter, David",
    editor = "Goldberg, Yoav  and
      Kozareva, Zornitsa  and
      Zhang, Yue",
    booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
    month = dec,
    year = "2022",
    address = "Abu Dhabi, United Arab Emirates",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2022.emnlp-main.637",
    doi = "10.18653/v1/2022.emnlp-main.637",
    pages = "9379--9393",
    abstract = "AI researchers have posited Dungeons and Dragons (D{\&}D) as a challenge problem to test systems on various language-related capabilities. In this paper, we frame D{\&}D specifically as a dialogue system challenge, where the tasks are to both generate the next conversational turn in the game and predict the state of the game given the dialogue history. We create a gameplay dataset consisting of nearly 900 games, with a total of 7,000 players, 800,000 dialogue turns, 500,000 dice rolls, and 58 million words. We automatically annotate the data with partial state information about the game play. We train a large language model (LM) to generate the next game turn, conditioning it on different information. The LM can respond as a particular character or as the player who runs the game{---}i.e., the Dungeon Master (DM). It is trained to produce dialogue that is either in-character (roleplaying in the fictional world) or out-of-character (discussing rules or strategy). We perform a human evaluation to determine what factors make the generated output plausible and interesting. We further perform an automatic evaluation to determine how well the model can predict the game state given the history and examine how well tracking the game state improves its ability to produce plausible conversational output.",
    environments={text, implicit_objectives},
    agents={more_than_three_agents, pretraining, finetuning},
    evaluation={human, rule_based},
    other={human_agent}
}

@inproceedings{zhou-etal-2023-cast,
    title = "{I} Cast Detect Thoughts: Learning to Converse and Guide with Intents and Theory-of-Mind in Dungeons and Dragons",
    author = "Zhou, Pei  and
      Zhu, Andrew  and
      Hu, Jennifer  and
      Pujara, Jay  and
      Ren, Xiang  and
      Callison-Burch, Chris  and
      Choi, Yejin  and
      Ammanabrolu, Prithviraj",
    editor = "Rogers, Anna  and
      Boyd-Graber, Jordan  and
      Okazaki, Naoaki",
    booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2023",
    address = "Toronto, Canada",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.acl-long.624",
    doi = "10.18653/v1/2023.acl-long.624",
    pages = "11136--11155",
    abstract = "We propose a novel task, G4C, to study teacher-student natural language interactions in a goal-driven and grounded environment. Dungeons and Dragons (D{\&}D), a role-playing game, provides an ideal setting to investigate such interactions. Here, the Dungeon Master (DM), i.e., the teacher, guides the actions of several players{---}students, each with their own personas and abilities{---}to achieve shared goals grounded in a fantasy world. Our approach is to decompose and model these interactions into (1) the DM{'}s intent to guide players toward a given goal; (2) the DM{'}s guidance utterance to the players expressing this intent; and (3) a theory-of-mind (ToM) model that anticipates the players{'} reaction to the guidance one turn into the future. We develop a novel reinforcement learning (RL) method for training a DM that generates guidance for players by rewarding utterances where the intent matches the ToM-anticipated player actions. Human and automated evaluations show that a DM trained to explicitly model intents and incorporate ToM of the players using RL generates better-quality guidance that is 3x more likely to fulfill the DM{'}s intent than a vanilla natural language generation (NLG) approach.",
    environments={text, implicit_objectives},
    agents={more_than_three_agents, reinforcement_learning},
    evaluation={human, rule_based},
    other={human_agent}
}

@inproceedings{zhu-etal-2023-fireball,
    title = "{FIREBALL}: A Dataset of Dungeons and Dragons Actual-Play with Structured Game State Information",
    author = "Zhu, Andrew  and
      Aggarwal, Karmanya  and
      Feng, Alexander  and
      Martin, Lara  and
      Callison-Burch, Chris",
    editor = "Rogers, Anna  and
      Boyd-Graber, Jordan  and
      Okazaki, Naoaki",
    booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2023",
    address = "Toronto, Canada",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.acl-long.229",
    doi = "10.18653/v1/2023.acl-long.229",
    pages = "4171--4193",
    abstract = "Dungeons {\&} Dragons (D{\&}D) is a tabletop roleplaying game with complex natural language interactions between players and hidden state information. Recent work has shown that large language models (LLMs) that have access to state information can generate higher quality game turns than LLMs that use dialog history alone. However, previous work used game state information that was heuristically created and was not a true gold standard game state. We present FIREBALL, a large dataset containing nearly 25,000 unique sessions from real D{\&}D gameplay on Discord with true game state info. We recorded game play sessions of players who used the Avrae bot, which was developed to aid people in playing D{\&}D online, capturing language, game commands and underlying game state information. We demonstrate that FIREBALL can improve natural language generation (NLG) by using Avrae state information, improving both automated metrics and human judgments of quality. Additionally, we show that LLMs can generate executable Avrae commands, particularly after finetuning.",
    environments={text, implicit_objectives},
    agents={more_than_three_agents, finetuning},
    evaluation={human, rule_based},
    other={human_agent}
}

@inproceedings{zhu2023calypso,
   title={{CALYPSO}: {LLMs} as Dungeon Masters' Assistants},
   author={Zhu, Andrew and Martin, Lara J. and Head, Andrew and Callison-Burch, Chris},
   booktitle={The 19th AAAI Conference on Artificial Intelligence and Interactive Digital Entertainment (AIIDE 2023)},
   month={8},
   year={2023},
   environments={text, implicit_objectives},
   agents={more_than_three_agents, finetuning},
   evaluation={human},
   other={human_agent},
   url={https://arxiv.org/abs/2308.07540}
}

@article{eliza1966weizenbaum,
  author = {Weizenbaum, Joseph},
  title = {ELIZA—a computer program for the study of natural language communication between man and machine},
  year = {1966},
  issue_date = {Jan. 1966},
  publisher = {Association for Computing Machinery},
  address = {New York, NY, USA},
  url = {https://doi.org/10.1145/365153.365168},
  doi = {10.1145/365153.365168},
  journal = {Commun. ACM},
  month = {jan},
  pages = {36–45},
  environments={text, mixed_objectives},
  agents={agents_with_personas},
  evaluation={human},
  other={n/a}
}

@article{shuster2022blenderbot,
  title={Blenderbot 3: a deployed conversational agent that continually learns to responsibly engage},
  author={Shuster, Kurt and Xu, Jing and Komeili, Mojtaba and Ju, Da and Smith, Eric Michael and Roller, Stephen and Ung, Megan and Chen, Moya and Arora, Kushal and Lane, Joshua and others},
  journal={arXiv preprint arXiv:2208.03188},
  year={2022},
  month={8},
  url={https://arxiv.org/abs/2208.03188},
  environments={text, mixed_objectives},
  agents={finetuning},
  evaluation={qualitative, human},
  other={n/a}
}

@misc{introducing2022,
  title={Introducing ChatGPT},
  author={OpenAI},
  year={2022},
  month={11},
  url={https://openai.com/blog/chatgpt},
  journal={n/a},
  environments={text, mixed_objectives},
  agents={prompting_and_in_context_learning, agents_with_memory},
  evaluation={qualitative, human},
  other={human_agent}
}

@article{chiang2024chatbot,
  title={Chatbot Arena: An Open Platform for Evaluating LLMs by Human Preference},
  author={Chiang, Wei-Lin and Zheng, Lianmin and Sheng, Ying and Angelopoulos, Anastasios Nikolas and Li, Tianle and Li, Dacheng and Zhang, Hao and Zhu, Banghua and Jordan, Michael and Gonzalez, Joseph E and others},
  journal={arXiv preprint arXiv:2403.04132},
  year={2024},
  month={3},
  url={https://arxiv.org/abs/2403.04132},
  environments={text, mixed_objectives},
  agents={prompting_and_in_context_learning},
  evaluation={qualitative, human},
  other={human_agent}
}

@article{zhang2022opt,
  title={Opt: Open pre-trained transformer language models},
  author={Zhang, Susan and Roller, Stephen and Goyal, Naman and Artetxe, Mikel and Chen, Moya and Chen, Shuohui and Dewan, Christopher and Diab, Mona and Li, Xian and Lin, Xi Victoria and others},
  journal={arXiv preprint arXiv:2205.01068},
  year={2022},
  month={5},
  url={https://arxiv.org/abs/2205.01068},
  environments={text, mixed_objectives},
  agents={finetuning, agents_with_personas},
  evaluation={qualitative, human},
  other={human_agent}
}

@article{zhou2020design,
  title = "The Design and Implementation of {X}iao{I}ce, an Empathetic Social Chatbot",
  author = "Zhou, Li  and
      Gao, Jianfeng  and
      Li, Di  and
      Shum, Heung-Yeung",
  journal = "Computational Linguistics",
  volume = "46",
  number = "1",
  year = "2020",
  month = "3",
  address = "Cambridge, MA",
  publisher = "MIT Press",
  url = "https://aclanthology.org/2020.cl-1.2",
  doi = "10.1162/coli_a_00368",
  pages = "53--93",
  environments={text, mixed_objectives},
  agents={finetuning, agents_with_personas},
  evaluation={qualitative, human},
  other={human_agent}
}

@incollection{cai2006empathic,
  title={Empathic computing},
  author={Cai, Yang},
  booktitle={Ambient intelligence in everyday life: Foreword by Emile Aarts},
  pages={67--85},
  year={2006},
  month={1},
  publisher={Springer},
  url={https://link.springer.com/chapter/10.1007/11825890_3},
  environments={text, mixed_objectives},
  agents={agents_with_personas},
  evaluation={human},
  other={n/a}
}

@inproceedings{dinan2018wizard,
  title={Wizard of Wikipedia: Knowledge-Powered Conversational Agents},
  author={Emily Dinan and Stephen Roller and Kurt Shuster and Angela Fan and Michael Auli and Jason Weston},
  booktitle={International Conference on Learning Representations},
  year={2019},
  month={4},
  url={https://openreview.net/forum?id=r1l73iRqKm},
  environments={text, mixed_objectives, implicit_objectives},
  agents={finetuning, agents_with_personas},
  evaluation={qualitative, human},
  other={human_agent}
}

@inproceedings{ghazvininejad2018knowledge,
  title={A knowledge-grounded neural conversation model},
  author={Ghazvininejad, Marjan and Brockett, Chris and Chang, Ming-Wei and Dolan, Bill and Gao, Jianfeng and Yih, Wen-tau and Galley, Michel},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={32},
  number={1},
  year={2018},
  month={4},
  url={https://ojs.aaai.org/index.php/AAAI/article/view/11977},
  environments={text, mixed_objectives, implicit_objectives},
  agents={finetuning},
  evaluation={qualitative, human},
  other={human_agent}
}

@article{li2016persona,
  title={A persona-based neural conversation model},
  author={Li, Jiwei and Galley, Michel and Brockett, Chris and Spithourakis, Georgios P and Gao, Jianfeng and Dolan, Bill},
  journal={arXiv preprint arXiv:1603.06155},
  year={2016},
  month={8},
  url={https://aclanthology.org/P16-1094/},
  environments={text, mixed_objectives},
  agents={finetuning, agents_with_personas},
  evaluation={qualitative, human},
  other={human_agent}
}

@book{wallace2009anatomy,
  title={The anatomy of ALICE},
  author={Wallace, Richard S},
  year={2009},
  month={11},
  publisher={Springer},
  journal={n/a},
  url={https://link.springer.com/chapter/10.1007/978-1-4020-6710-5_13},
  environments={text, mixed_objectives},
  agents={agents_with_personas},
  evaluation={human},
  other={n/a}
}

@inproceedings{fung2018towards,
  title={Towards empathetic human-robot interactions},
  author={Fung, Pascale and Bertero, Dario and Wan, Yan and Dey, Anik and Chan, Ricky Ho Yin and Bin Siddique, Farhad and Yang, Yang and Wu, Chien-Sheng and Lin, Ruixi},
  booktitle={Computational Linguistics and Intelligent Text Processing: 17th International Conference, CICLing 2016, Konya, Turkey, April 3--9, 2016, Revised Selected Papers, Part II 17},
  pages={173--193},
  year={2018},
  month={3},
  organization={Springer},
  url={https://link.springer.com/chapter/10.1007/978-3-319-75487-1_14},
  environments={text, mixed_objectives},
  agents={agents_with_personas},
  evaluation={qualitative, human},
  other={human_agent}
}

@article{clavel2022socio,
  title={Socio-conversational systems: Three challenges at the crossroads of fields},
  author={Clavel, Chlo{\'e} and Labeau, Matthieu and Cassell, Justine},
  journal={Frontiers in Robotics and AI},
  volume={9},
  pages={937825},
  year={2022},
  month={1},
  url={https://pubmed.ncbi.nlm.nih.gov/36591412/},
  publisher={Frontiers},
  environments = {collaboration},
  agents = {agents_with_personas, two_agents},
  evaluation = {qualitative, human},
  other = {n/a}
}

@article{rashkin2018towards,
  title={Towards empathetic open-domain conversation models: A new benchmark and dataset},
  author={Rashkin, Hannah and Smith, Eric Michael and Li, Margaret and Boureau, Y-Lan},
  journal={arXiv preprint arXiv:1811.00207},
  year={2018},
  month={11},
  url={https://arxiv.org/abs/1811.00207},
  environments = {collaboration},
  agents = {agents_with_personas, two_agents},
  evaluation = {qualitative, human},
  other = {n/a}
}

@book{lugrin2022handbook,
  title={The Handbook on Socially Interactive Agents: 20 Years of Research on Embodied Conversational Agents, Intelligent Virtual Agents, and Social Robotics Volume 2: Interactivity, Platforms, Application},
  author={Lugrin, Birgit and Pelachaud, Catherine and Traum, David},
  year={2022},
  month={1},
  url={https://dl.acm.org/doi/book/10.1145/3477322},
  journal={ACM},
  environments = {collaboration},
  agents = {agents_with_personas, two_agents},
  evaluation = {qualitative, human},
  other = {n/a}
}

@article{smith2022human,
  title={Human evaluation of conversations is an open problem: comparing the sensitivity of various methods for evaluating dialogue agents},
  author={Smith, Eric Michael and Hsu, Orion and Qian, Rebecca and Roller, Stephen and Boureau, Y-Lan and Weston, Jason},
  journal={arXiv preprint arXiv:2201.04723},
  year={2022},
  month={1},
  url={https://arxiv.org/abs/2201.04723},
  environments = {collaboration},
  agents = {agents_with_personas, two_agents},
  evaluation = {human},
  other = {n/a}
}

@article{mehri2022report,
  title={Report from the nsf future directions workshop on automatic evaluation of dialog: Research directions and challenges},
  author={Mehri, Shikib and Choi, Jinho and D'Haro, Luis Fernando and Deriu, Jan and Eskenazi, Maxine and Gasic, Milica and Georgila, Kallirroi and Hakkani-Tur, Dilek and Li, Zekang and Rieser, Verena and others},
  journal={arXiv preprint arXiv:2203.10012},
  year={2022},
  month={3},
  url={https://arxiv.org/abs/2203.10012},
  environments = {collaboration},
  agents = {agents_with_personas, two_agents},
  evaluation = {qualitative, human},
  other = {n/a}
}


@ARTICLE{Shinn2023-tt,
  title         = "Reflexion: Language Agents with Verbal Reinforcement Learning",
  author        = "Shinn, Noah and Cassano, Federico and Labash, Beck and Gopinath, Ashwin and Narasimhan, Karthik and Yao, Shunyu",
  month         =  mar,
  year          =  2023,
  url           = "http://arxiv.org/abs/2303.11366",
  archivePrefix = "arXiv",
  eprint        = "2303.11366",
  primaryClass  = "cs.AI",
  arxivid       = "2303.11366",
  environments = {mixed_objectives, text},
  agents = {prompting_and_in_context_learning, reinforcement_learning, agents_with_memory},
  evaluation = {rule_based},
  other = {more_omniscient},
}

@inproceedings{toriumi2017ai,
  title={AI wolf contest—development of game AI using collective intelligence—},
  author={Toriumi, Fujio and Osawa, Hirotaka and Inaba, Michimasa and Katagami, Daisuke and Shinoda, Kosuke and Matsubara, Hitoshi},
  booktitle={Computer Games: 5th Workshop on Computer Games, CGW 2016, and 5th Workshop on General Intelligence in Game-Playing Agents, GIGA 2016, Held in Conjunction with the 25th International Conference on Artificial Intelligence, IJCAI 2016, New York, USA, July 9-10, 2016, Revised Selected Papers 5},
  pages={101--115},
  year={2017},
  month={4},
  organization={Springer},
  url={https://link.springer.com/chapter/10.1007/978-3-319-57969-6_8},
  agents = {reinforcement_learning, agents_with_memory},
  environments = {text},
  evaluation = {rule_based},
  other = {human_agent},
}


#### Embodied Environments
@article{environments/embodied,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}

@inproceedings{10.1145/3406499.3418760,
author = {Tsoi, Nathan and Hussein, Mohamed and Espinoza, Jeacy and Ruiz, Xavier and V\'{a}zquez, Marynel},
title = {SEAN: Social Environment for Autonomous Navigation},
year = {2020},
month={9},
isbn = {9781450380546},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3406499.3418760},
doi = {10.1145/3406499.3418760},
abstract = {Social navigation research is performed on a variety of robotic platforms, scenarios, and environments. Making comparisons between navigation algorithms is challenging because of the effort involved in building these systems and the diversity of platforms used by the community; nonetheless, evaluation is critical to understanding progress in the field. In a step towards reproducible evaluation of social navigation algorithms, we propose the Social Environment for Autonomous Navigation (SEAN). SEAN is a high visual fidelity, open source, and extensible social navigation simulation platform which includes a toolkit for evaluation of navigation algorithms. We demonstrate SEAN and its evaluation toolkit in two environments with dynamic pedestrians and using two different robots.},
booktitle = {Proceedings of the 8th International Conference on Human-Agent Interaction},
pages = {281–283},
numpages = {3},
keywords = {social robot navigation, human-robot interaction},
location = {Virtual Event, USA},
series = {HAI '20},
environments={mixed_objectives, embodied},
agents={reinforcement_learning},
evaluation={rule_based},
other={human_agent, simulated_humans}
}

@inproceedings{puig2024habitat,
title={Habitat 3.0: A Co-Habitat for Humans, Avatars, and Robots},
author={Xavier Puig and Eric Undersander and Andrew Szot and Mikael Dallaire Cote and Tsung-Yen Yang and Ruslan Partsey and Ruta Desai and Alexander Clegg and Michal Hlavac and So Yeon Min and Vladim{\'\i}r Vondru{\v{s}} and Theophile Gervet and Vincent-Pierre Berges and John M Turner and Oleksandr Maksymets and Zsolt Kira and Mrinal Kalakrishnan and Jitendra Malik and Devendra Singh Chaplot and Unnat Jain and Dhruv Batra and Akshara Rai and Roozbeh Mottaghi},
booktitle={The Twelfth International Conference on Learning Representations},
year={2023},
month={10},
url={https://openreview.net/forum?id=4znwzG92CE},
  environments={mixed_objectives, embodied},
  agents={reinforcement_learning},
  evaluation={rule_based},
  other={human_agent, simulated_humans}
}

@article{team2024scaling,
  title={Scaling Instructable Agents Across Many Simulated Worlds},
  author={Team, SIMA and Abi Raad, Maria and Ahuja, Arun and Barros, Catarina and Besse, Frederic and Bolt, Andrew and Bolton, Adrian and Brownfield, Bethanie and Buttimore, Gavin and Cant, Max and others},
  year={2024},
  month={4},
  url={https://arxiv.org/abs/2404.10179v2},
  journal={arXiv preprint arXiv:2404.10179},
  environments={embodied},
  agents={prompting_and_in_context_learning, finetuning},
  evaluation={qualitative},
  other={human_agent}
}

@article{ma2023large,
  title={Large language models play starcraft ii: Benchmarks and a chain of summarization approach},
  author={Ma, Weiyu and Mi, Qirui and Yan, Xue and Wu, Yuqiao and Lin, Runji and Zhang, Haifeng and Wang, Jun},
  journal={arXiv preprint arXiv:2312.11865},
  year={2023},
  month={12},
  url={https://arxiv.org/abs/2312.11865},
  environments={embodied},
  agents={prompting_and_in_context_learning, finetuning},
  evaluation={qualitative},
  other={human_agent}
}

@misc{opengenerativeai2024evaluate,
  title={Evaluate LLMs in real time with Street Fighter III},
  author={OpenGenerativeAI team},
  year={2024},
  month={3},
  url={https://github.com/OpenGenerativeAI/llm-colosseum},
  journal={n/a},
  environments={embodied},
  agents={prompting_and_in_context_learning},
  evaluation={qualitative},
  other={human_agent}
}

@misc{zhao2023competeai,
      title={CompeteAI: Understanding the Competition Behaviors in Large Language Model-based Agents}, 
      author={Qinlin Zhao and Jindong Wang and Yixuan Zhang and Yiqiao Jin and Kaijie Zhu and Hao Chen and Xing Xie},
      environments = {competition, text},
      agents = {prompting_and_in_context_learning, two_agents},
      evaluation = {rule_based},
      url = {https://arxiv.org/abs/2310.17512},
      other = {n/a},
      year={2023},
      eprint={2310.17512},
      archivePrefix={arXiv},
      primaryClass={cs.AI}
}

@article{liang2023foundations,
  title={Foundations \& Trends in Multimodal Machine Learning: Principles, Challenges, and Open Questions},
  author={Liang, Paul Pu and Zadeh, Amir and Morency, Louis-Philippe},
  journal={ACM Computing Surveys},
  publisher={ACM New York, NY},
  year={2023},
  month={1},
  url={https://dl.acm.org/doi/abs/10.1145/3656580},
  environments = {text, virtual, embodied, robotics, collaboration},
  agents = {prompting_and_in_context_learning, finetuning, pretraining, reinforcement_learning, two_agents},
  evaluation = {qualitative, human},
  other = {n/a},
}


#### Virtual Environments
@article{environments/virtual,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}

@inproceedings{li2018appinite,
  title={Appinite: A multi-modal interface for specifying data descriptions in programming by demonstration using natural language instructions},
  author={Li, Toby Jia-Jun and Labutov, Igor and Li, Xiaohan Nancy and Zhang, Xiaoyi and Shi, Wenze and Ding, Wanling and Mitchell, Tom M and Myers, Brad A},
  booktitle={2018 IEEE Symposium on Visual Languages and Human-Centric Computing (VL/HCC)},
  pages={105--114},
  year={2018},
  month={3},
  organization={IEEE},
  url={https://ieeexplore.ieee.org/document/8506506},
  environments={virtual},
  agents={prompting_and_in_context_learning},
  evaluation={human, qualitative},
  other={human_agent}
}

@inproceedings{li2019pumice,
  title={Pumice: A multi-modal agent that learns concepts and conditionals from natural language and demonstrations},
  author={Li, Toby Jia-Jun and Radensky, Marissa and Jia, Justin and Singarajah, Kirielle and Mitchell, Tom M and Myers, Brad A},
  booktitle={Proceedings of the 32nd annual ACM symposium on user interface software and technology},
  pages={577--589},
  year={2019},
  month={3},
  url={https://dl.acm.org/doi/10.1145/3332165.3347899},
  environments={virtual},
  agents={prompting_and_in_context_learning},
  evaluation={human, qualitative},
  other={human_agent}
}

@inproceedings{li2020interactive,
  title={Interactive task learning from GUI-grounded natural language instructions and demonstrations},
  author={Li, Toby Jia-Jun and Mitchell, Tom and Myers, Brad},
  booktitle={Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations},
  pages={215--223},
  year={2020},
  month={9},
  url={https://arxiv.org/abs/1909.00031},
  environments={virtual},
  agents={prompting_and_in_context_learning},
  evaluation={human, qualitative},
  other={human_agent}
}

@article{yang2023appagent,
  title={Appagent: Multimodal agents as smartphone users},
  author={Yang, Zhao and Liu, Jiaxuan and Han, Yucheng and Chen, Xin and Huang, Zebiao and Fu, Bin and Yu, Gang},
  journal={arXiv preprint arXiv:2312.13771},
  year={2023},
  month={12},
  url={https://arxiv.org/abs/2312.13771},
  environments={virtual},
  agents={prompting_and_in_context_learning},
  evaluation={rule_based},
  other={n/a}
}

@article{zhang2024ufo,
  title={UFO: A UI-Focused Agent for Windows OS Interaction},
  author={Zhang, Chaoyun and Li, Liqun and He, Shilin and Zhang, Xu and Qiao, Bo and Qin, Si and Ma, Minghua and Kang, Yu and Lin, Qingwei and Rajmohan, Saravan and others},
  journal={arXiv preprint arXiv:2402.07939},
  year={2024},
  month={2},
  url={https://arxiv.org/abs/2402.07939},
  environments={virtual},
  agents={prompting_and_in_context_learning},
  evaluation={rule_based},
  other={n/a}
}

@article{wang2024mobile,
  title={Mobile-Agent: Autonomous multi-modal mobile device agent with visual perception},
  author={Wang, Junyang and Xu, Haiyang and Ye, Jiabo and Yan, Ming and Shen, Weizhou and Zhang, Ji and Huang, Fei and Sang, Jitao},
  journal={arXiv preprint arXiv:2401.16158},
  year={2024},
  month={1},
  url={https://arxiv.org/abs/2401.16158},
  environments={virtual},
  agents={prompting_and_in_context_learning},
  evaluation={rule_based},
  other={n/a}
}

@article{wu2024copilot,
  title={Os-copilot: Towards generalist computer agents with self-improvement},
  author={Wu, Zhiyong and Han, Chengcheng and Ding, Zichen and Weng, Zhenmin and Liu, Zhoumianze and Yao, Shunyu and Yu, Tao and Kong, Lingpeng},
  journal={arXiv preprint arXiv:2402.07456},
  year={2024},
  month={2},
  url={https://arxiv.org/abs/2402.07456},
  environments={virtual},
  agents={prompting_and_in_context_learning},
  evaluation={rule_based},
  other={n/a}
}

@article{zhou2023webarena,
  title={Webarena: A realistic web environment for building autonomous agents},
  author={Zhou, Shuyan and Xu, Frank F and Zhu, Hao and Zhou, Xuhui and Lo, Robert and Sridhar, Abishek and Cheng, Xianyi and Bisk, Yonatan and Fried, Daniel and Alon, Uri and others},
  journal={arXiv preprint arXiv:2307.13854},
  year={2023},
  month={7},
  url={https://arxiv.org/abs/2307.13854},
  environments={virtual},
  agents={prompting_and_in_context_learning},
  evaluation={rule_based},
  other={n/a}
}

@article{koh2024visualwebarena,
  title={Visualwebarena: Evaluating multimodal agents on realistic visual web tasks},
  author={Koh, Jing Yu and Lo, Robert and Jang, Lawrence and Duvvur, Vikram and Lim, Ming Chong and Huang, Po-Yu and Neubig, Graham and Zhou, Shuyan and Salakhutdinov, Ruslan and Fried, Daniel},
  journal={arXiv preprint arXiv:2401.13649},
  year={2024},
  month={1},
  url={https://arxiv.org/abs/2401.13649},
  environments={virtual},
  agents={prompting_and_in_context_learning},
  evaluation={rule_based},
  other={n/a}
}

@article{yao2022webshop,
  title={Webshop: Towards scalable real-world web interaction with grounded language agents},
  author={Yao, Shunyu and Chen, Howard and Yang, John and Narasimhan, Karthik},
  journal={Advances in Neural Information Processing Systems},
  volume={35},
  pages={20744--20757},
  year={2022},
  month={12},
  url={https://proceedings.neurips.cc/paper_files/paper/2022/file/82ad13ec01f9fe44c01cb91814fd7b8c-Paper-Conference.pdf},
  environments={virtual},
  agents={prompting_and_in_context_learning, finetuning},
  evaluation={rule_based},
  other={n/a}
}

@inproceedings{humphreys2022data,
  title={A data-driven approach for learning to control computers},
  author={Humphreys, Peter C and Raposo, David and Pohlen, Tobias and Thornton, Gregory and Chhaparia, Rachita and Muldal, Alistair and Abramson, Josh and Georgiev, Petko and Santoro, Adam and Lillicrap, Timothy},
  booktitle={International Conference on Machine Learning},
  pages={9466--9482},
  year={2022},
  month={7},
  organization={PMLR},
  url={https://arxiv.org/abs/2202.08137},
  environments={virtual},
  agents={finetuning, reinforcement_learning},
  evaluation={rule_based},
  other={n/a}
}

@inproceedings{shi2017world,
  title={World of bits: An open-domain platform for web-based agents},
  author={Shi, Tianlin and Karpathy, Andrej and Fan, Linxi and Hernandez, Jonathan and Liang, Percy},
  booktitle={International Conference on Machine Learning},
  pages={3135--3144},
  year={2017},
  month={8},
  organization={PMLR},
  url={https://proceedings.mlr.press/v70/shi17a/shi17a.pdf},
  environments={virtual},
  agents={reinforcement_learning},
  evaluation={rule_based},
  other={n/a}
}

@article{liu2018reinforcement,
  title={Reinforcement learning on web interfaces using workflow-guided exploration},
  author={Liu, Evan Zheran and Guu, Kelvin and Pasupat, Panupong and Shi, Tianlin and Liang, Percy},
  journal={arXiv preprint arXiv:1802.08802},
  year={2018},
  month={2},
  url={https://arxiv.org/abs/1802.08802},
  environments={virtual},
  agents={reinforcement_learning},
  evaluation={rule_based},
  other={n/a}
}

@inproceedings{branavan2009reinforcement,
  title={Reinforcement learning for mapping instructions to actions},
  author={Branavan, Satchuthananthavale RK and Chen, Harr and Zettlemoyer, Luke and Barzilay, Regina},
  booktitle={Proceedings of the Joint Conference of the 47th Annual Meeting of the ACL and the 4th International Joint Conference on Natural Language Processing of the AFNLP},
  pages={82--90},
  year={2009},
  month={8},
  url={https://aclanthology.org/P09-1010/},
  environments={virtual},
  agents={reinforcement_learning},
  evaluation={rule_based},
  other={n/a}
}

@article{toyama2021androidenv,
  title={Androidenv: A reinforcement learning platform for android},
  author={Toyama, Daniel and Hamel, Philippe and Gergely, Anita and Comanici, Gheorghe and Glaese, Amelia and Ahmed, Zafarali and Jackson, Tyler and Mourad, Shibl and Precup, Doina},
  journal={arXiv preprint arXiv:2105.13231},
  year={2021},
  month={5},
  url={https://arxiv.org/abs/2105.13231},
  environments={virtual},
  agents={reinforcement_learning},
  evaluation={rule_based},
  other={n/a}
}

@article{li2020mapping,
  title={Mapping natural language instructions to mobile UI action sequences},
  author={Li, Yang and He, Jiacong and Zhou, Xin and Zhang, Yuan and Baldridge, Jason},
  journal={arXiv preprint arXiv:2005.03776},
  year={2020},
  month={5},
  url={https://arxiv.org/abs/2005.03776},
  environments={virtual},
  agents={finetuning},
  evaluation={rule_based},
  other={n/a}
}

@inproceedings{burns2022dataset,
  title={A dataset for interactive vision-language navigation with unknown command feasibility},
  author={Burns, Andrea and Arsan, Deniz and Agrawal, Sanjna and Kumar, Ranjitha and Saenko, Kate and Plummer, Bryan A},
  booktitle={European Conference on Computer Vision},
  pages={312--328},
  year={2022},
  month={2},
  url={https://arxiv.org/abs/2202.02312},
  organization={Springer},
  environments={virtual},
  agents={finetuning},
  evaluation={rule_based},
  other={n/a}
}

@article{deng2024mind2web,
  title={Mind2web: Towards a generalist agent for the web},
  author={Deng, Xiang and Gu, Yu and Zheng, Boyuan and Chen, Shijie and Stevens, Sam and Wang, Boshi and Sun, Huan and Su, Yu},
  journal={Advances in Neural Information Processing Systems},
  volume={36},
  year={2024},
  month={1},
  url={https://arxiv.org/abs/2306.06070},
  environments={virtual},
  agents={prompting_and_in_context_learning},
  evaluation={rule_based},
  other={n/a}
}

@article{rawles2023android,
  title={Android in the wild: A large-scale dataset for android device control},
  author={Rawles, Christopher and Li, Alice and Rodriguez, Daniel and Riva, Oriana and Lillicrap, Timothy},
  journal={arXiv preprint arXiv:2307.10088},
  year={2023},
  month={7},
  url={https://arxiv.org/abs/2307.10088},
  environments={virtual},
  agents={finetuning},
  evaluation={rule_based},
  other={n/a}
}

@inproceedings{allen2007plow,
  title={Plow: A collaborative task learning agent},
  author={Allen, James and Chambers, Nathanael and Ferguson, George and Galescu, Lucian and Jung, Hyuckchul and Swift, Mary and Taysom, William},
  booktitle={AAAI},
  volume={7},
  pages={1514--1519},
  year={2007},
  month={7},
  url={https://cdn.aaai.org/AAAI/2007/AAAI07-240.pdf},
  environments={virtual},
  agents={prompting_and_in_context_learning},
  evaluation={human}, 
  other={human_agent}
}

@article{xu2021grounding,
  title={Grounding open-domain instructions to automate web support tasks},
  author={Xu, Nancy and Masling, Sam and Du, Michael and Campagna, Giovanni and Heck, Larry and Landay, James and Lam, Monica S},
  journal={arXiv preprint arXiv:2103.16057},
  year={2021},
  month={3},
  url={https://arxiv.org/abs/2103.16057},
  environments={virtual, embodied},
  agents={finetuning},
  evaluation={rule_based},
  other={n/a}
}


@article{kolve2017ai2,
  title={Ai2-thor: An interactive 3d environment for visual ai},
  author={Kolve, Eric and Mottaghi, Roozbeh and Han, Winson and VanderBilt, Eli and Weihs, Luca and Herrasti, Alvaro and Deitke, Matt and Ehsani, Kiana and Gordon, Daniel and Zhu, Yuke and others},
  journal={arXiv preprint arXiv:1712.05474},
  year={2017},
  month={5},
  url = {https://arxiv.org/abs/1712.05474},
  environments={virtual},
  agents = {n/a},
  evaluation = {rule_based},
  other= {n/a},
}

@misc{OSWorld,
    title={OSWorld: Benchmarking Multimodal Agents for Open-Ended Tasks in Real Computer Environments},
    author={Tianbao Xie and Danyang Zhang and Jixuan Chen and Xiaochuan Li and Siheng Zhao and Ruisheng Cao and Toh Jing Hua and Zhoujun Cheng and Dongchan Shin and Fangyu Lei and Yitao Liu and Yiheng Xu and Shuyan Zhou and Silvio Savarese and Caiming Xiong and Victor Zhong and Tao Yu},
    journal={arXiv preprint arXiv:2404.07972},
    year={2024},
    month={4},
    url = {https://arxiv.org/abs/2404.07972},
    environments={virtual},
    agents = {prompting_and_in_context_learning},
    evaluation = {rule_based},
    other = {n/a}
}

@article{drouin2024workarena,
  title={WorkArena: How Capable Are Web Agents at Solving Common Knowledge Work Tasks?},
  author={Drouin, Alexandre and Gasse, Maxime and Caccia, Massimo and Laradji, Issam H and Del Verme, Manuel and Marty, Tom and Boisvert, L{\'e}o and Thakkar, Megh and Cappart, Quentin and Vazquez, David and others},
  journal={arXiv preprint arXiv:2403.07718},
  year={2024},
  month={3},
  url = {https://arxiv.org/abs/2403.07718},
  environments={virtual},
  agents = {prompting_and_in_context_learning},
  evaluation = {rule_based},
  other = {n/a}
}

@inproceedings{puig2018virtualhome,
  title={Virtualhome: Simulating household activities via programs},
  author={Puig, Xavier and Ra, Kevin and Boben, Marko and Li, Jiaman and Wang, Tingwu and Fidler, Sanja and Torralba, Antonio},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={8494--8502},
  year={2018},
  month={6},
  url = {https://openaccess.thecvf.com/content_cvpr_2018/html/Puig_VirtualHome_Simulating_Household_CVPR_2018_paper.html},
  environments={virtual, embodied},
  agents = {n/a},
  evaluation = {rule_based},
  other= {n/a},
}

@article{kuttler2020nethack,
  title={The nethack learning environment},
  author={K{\"u}ttler, Heinrich and Nardelli, Nantas and Miller, Alexander and Raileanu, Roberta and Selvatici, Marco and Grefenstette, Edward and Rockt{\"a}schel, Tim},
  journal={Advances in Neural Information Processing Systems},
  volume={33},
  pages={7671--7684},
  year={2020},
  month={4},
  url = {https://arxiv.org/abs/2006.13760},
  environments={virtual},
  agents = {reinforcement_learning},
  evaluation = {rule_based},
  other= {n/a},
}

@inproceedings{cote2019textworld,
  title={Textworld: A learning environment for text-based games},
  author={C{\^o}t{\'e}, Marc-Alexandre and K{\'a}d{\'a}r, Akos and Yuan, Xingdi and Kybartas, Ben and Barnes, Tavian and Fine, Emery and Moore, James and Hausknecht, Matthew and El Asri, Layla and Adada, Mahmoud and others},
  booktitle={Computer Games: 7th Workshop, CGW 2018, Held in Conjunction with the 27th International Conference on Artificial Intelligence, IJCAI 2018, Stockholm, Sweden, July 13, 2018, Revised Selected Papers 7},
  pages={41--75},
  year={2019},
  month={1},
  organization={Springer},
  url = {https://arxiv.org/abs/1806.11532},
  environments={virtual, text},
  agents = {n/a},
  evaluation = {rule_based},
  other= {n/a},
}

@article{wang2022scienceworld,
  title={Scienceworld: Is your agent smarter than a 5th grader?},
  author={Wang, Ruoyao and Jansen, Peter and C{\^o}t{\'e}, Marc-Alexandre and Ammanabrolu, Prithviraj},
  journal={arXiv preprint arXiv:2203.07540},
  year={2022},
  month={2},
  url={https://arxiv.org/abs/2203.07540},
  environments={virtual},
  agents = {finetuning},
  evaluation = {rule_based},
  other= {n/a}, 
}

@article{fan2022minedojo,
  title={Minedojo: Building open-ended embodied agents with internet-scale knowledge},
  author={Fan, Linxi and Wang, Guanzhi and Jiang, Yunfan and Mandlekar, Ajay and Yang, Yuncong and Zhu, Haoyi and Tang, Andrew and Huang, De-An and Zhu, Yuke and Anandkumar, Anima},
  journal={Advances in Neural Information Processing Systems},
  volume={35},
  pages={18343--18362},
  year={2022},
  month={8},
  url={https://arxiv.org/abs/2206.08853},
  environments = {virtual, embodied},
  agents = {reinforcement_learning},
  evaluation = {rule_based},
  other = {n/a},
}

# human-virtual agent interaction, IDE, SE, Programming assistant, or others

@inproceedings{nam2024using,
  title={Using an llm to help with code understanding},
  author={Nam, Daye and Macvean, Andrew and Hellendoorn, Vincent and Vasilescu, Bogdan and Myers, Brad},
  booktitle={Proceedings of the IEEE/ACM 46th International Conference on Software Engineering},
  pages={1--13},
  year={2024},
  month={4},
  url={https://doi.org/10.1145/3597503.3639187},
  environments = {mixed_objectives, virtual},
  agents = {prompting_and_in_context_learning, two_agents},
  evaluation = {qualitative, human, rule_based},
  other = {human_agent, more_omniscient},
}

@inproceedings{mozannar2023simulating,
  title={Simulating Iterative Human-AI Interaction in Programming with LLMs},
  author={Mozannar, Hussein and Chen, Valerie and Wei, Dennis and Sattigeri, Prasanna and Nagireddy, Manish and Das, Subhro and Talwalkar, Ameet and Sontag, David},
  booktitle={NeurIPS 2023 Workshop on Instruction Tuning and Instruction Following},
  year={2023},
  month=nov,
  url="https://openreview.net/pdf?id=0nRcZeeE5f",
  environments = {mixed_objectives, virtual},
  agents = {two_agents, prompting_and_in_context_learning},
  evaluation = {rule_based},
  other = {simulated_humans, more_omniscient},
}


@INPROCEEDINGS{Ross2023-sb,
  title     = "The Programmer's Assistant: Conversational Interaction with a Large Language Model for Software Development",
  booktitle = "Proceedings of the 28th International Conference on Intelligent User Interfaces",
  author    = "Ross, Steven I and Martinez, Fernando and Houde, Stephanie and Muller, Michael and Weisz, Justin D",
  publisher = "Association for Computing Machinery",
  pages     = "491--514",
  series    = "IUI '23",
  month     =  mar,
  year      =  2023,
  url       = "https://doi.org/10.1145/3581641.3584037",
  address   = "New York, NY, USA",
  location  = "<conf-loc>, <city>Sydney</city>, <state>NSW</state>,
               <country>Australia</country>, </conf-loc>",
  isbn      = "9798400701061",
  doi       = "10.1145/3581641.3584037",
  environments = {mixed_objectives, virtual},
  agents = {prompting_and_in_context_learning, two_agents},
  evaluation = {qualitative, human, rule_based},
  other = {human_agent, more_omniscient},
}


#### Robotics
@article{environments/robotics,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}

@inproceedings{li2022seehearfeel,
    title={See, Hear, and Feel: Smart Sensory Fusion for Robotic Manipulation},
    author={Hao Li and Yizhi Zhang and Junzhe Zhu and Shaoxiong Wang and Michelle A. Lee and Huazhe Xu and Edward Adelson and Li Fei-Fei and Ruohan Gao and Jiajun Wu},
    booktitle={CoRL},
    year={2022},
    month={12},
    url={https://ai.stanford.edu/~rhgao/see_hear_feel/},
    environments = {embodied, robotics},
    agents = {reinforcement_learning},
    evaluation = {rule_based},
    other = {n/a},
}

@inproceedings{thomason:corl19,
  title={Vision-and-Dialog Navigation},
  author={Jesse Thomason and Michael Murray and Maya Cakmak and Luke Zettlemoyer},
  booktitle={Conference on Robot Learning (CoRL)},
  year={2019},
  month={10},
  url = {https://cvdn.dev/},
  environments = {embodied, robotics},
  agents = {reinforcement_learning},
  evaluation = {rule_based},
  other = {n/a},
}

@inproceedings{shah2023mutex,
  title        = {MUTEX: Learning Unified Policies from Multimodal Task Specifications},
  author       = {Rutav Shah and Roberto Mart{\'\i}n-Mart{\'\i}n and Yuke Zhu},
  year         = 2023,
  month        = {7},
  booktitle    = {7th Annual Conference on Robot Learning},
  url          = {https://openreview.net/forum?id=PwqiqaaEzJ},
  environments = {embodied, robotics},
  agents = {prompting_and_in_context_learning, pretraining},
  evaluation = {rule_based},
  other = {n/a},
}

@misc{jain2024vid2robot,
  title={Vid2Robot: End-to-end Video-conditioned Policy Learning with Cross-Attention Transformers}, 
  author={Vidhi Jain and Maria Attarian and Nikhil J Joshi and Ayzaan Wahid and Danny Driess and Quan Vuong and Pannag R Sanketi and Pierre Sermanet and Stefan Welker and Christine Chan and Igor Gilitschenski and Yonatan Bisk and Debidatta Dwibedi},
  year={2024},
  month={3},
  eprint={2403.12943},
  archivePrefix={arXiv},
  primaryClass={cs.RO},
  url = {https://vid2robot.github.io/},
  environments = {robotics},
  agents = {prompting_and_in_context_learning, pretraining},
  evaluation = {human, qualitative},
  other = {n/a},
}

@inproceedings{
  jain2022transformers,
  title={Transformers Are Adaptable Task Planners},
  author={Vidhi Jain and Yixin Lin and Eric Undersander and Yonatan Bisk and Akshara Rai},
  booktitle={6th Annual Conference on Robot Learning},
  year={2022},
    month={12},
  url={https://openreview.net/forum?id=Eal_lL08v_l},
  environments = {embodied, robotics},
  agents = {prompting_and_in_context_learning, pretraining},
  evaluation = {rule_based},
  other = {n/a},
}

@misc{li2024behavior1k,
  title={BEHAVIOR-1K: A Human-Centered, Embodied AI Benchmark with 1,000 Everyday Activities and Realistic Simulation}, 
  author={Chengshu Li and Ruohan Zhang and Josiah Wong and Cem Gokmen and Sanjana Srivastava and Roberto Martín-Martín and Chen Wang and Gabrael Levine and Wensi Ai and Benjamin Martinez and Hang Yin and Michael Lingelbach and Minjune Hwang and Ayano Hiranaka and Sujay Garlanka and Arman Aydin and Sharon Lee and Jiankai Sun and Mona Anvari and Manasi Sharma and Dhruva Bansal and Samuel Hunter and Kyu-Young Kim and Alan Lou and Caleb R Matthews and Ivan Villa-Renteria and Jerry Huayang Tang and Claire Tang and Fei Xia and Yunzhu Li and Silvio Savarese and Hyowon Gweon and C. Karen Liu and Jiajun Wu and Li Fei-Fei},
  year={2024},
  month={3},
  eprint={2403.09227},
  archivePrefix={arXiv},
  primaryClass={cs.RO},
  url = {https://behavior.stanford.edu/behavior-1k},
  environments = {embodied, robotics},
  agents = {n/a},
  evaluation = {rule_based},
  other = {simulated_humans, fully_omniscient},
}

@article{gan2020threedworld,
  title={Threedworld: A platform for interactive multi-modal physical simulation},
  author={Gan, Chuang and Schwartz, Jeremy and Alter, Seth and Mrowca, Damian and Schrimpf, Martin and Traer, James and De Freitas, Julian and Kubilius, Jonas and Bhandwaldar, Abhishek and Haber, Nick and others},
  journal={arXiv preprint arXiv:2007.04954},
  year={2020},
  month={3},
  url = {https://www.threedworld.org/},
  environments = {embodied, robotics},
  agents = {n/a},
  evaluation = {rule_based},
  other = {simulated_humans, fully_omniscient},
}

@inproceedings{lin2023gestureinformed,
  title={Gesture-Informed Robot Assistance via Foundation Models},
  author={Li-Heng Lin and Yuchen Cui and Yilun Hao and Fei Xia and Dorsa Sadigh},
  booktitle={7th Annual Conference on Robot Learning},
  year={2023},
  month={6},
  url={https://openreview.net/forum?id=Ffn8Z4Q-zU},
  environments = {robotics},
  agents = {prompting_and_in_context_learning},
  evaluation = {human, qualitative},
  other = {n/a},
}

@misc{khazatsky2024droid,
  title   = {DROID: A Large-Scale In-The-Wild Robot Manipulation Dataset},
  author  = {Alexander Khazatsky and Karl Pertsch and Suraj Nair and Ashwin Balakrishna and Sudeep Dasari and Siddharth Karamcheti and Soroush Nasiriany and Mohan Kumar Srirama and Lawrence Yunliang Chen and Kirsty Ellis and Peter David Fagan and Joey Hejna and Masha Itkina and Marion Lepert and Yecheng Jason Ma and Patrick Tree Miller and Jimmy Wu and Suneel Belkhale and Shivin Dass and Huy Ha and Arhan Jain and Abraham Lee and Youngwoon Lee and Marius Memmel and Sungjae Park and Ilija Radosavovic and Kaiyuan Wang and Albert Zhan and Kevin Black and Cheng Chi and Kyle Beltran Hatch and Shan Lin and Jingpei Lu and Jean Mercat and Abdul Rehman and Pannag R Sanketi and Archit Sharma and Cody Simpson and Quan Vuong and Homer Rich Walke and Blake Wulfe and Ted Xiao and Jonathan Heewon Yang and Arefeh Yavary and Tony Z. Zhao and Christopher Agia and Rohan Baijal and Mateo Guaman Castro and Daphne Chen and Qiuyu Chen and Trinity Chung and Jaimyn Drake and Ethan Paul Foster and Jensen Gao and David Antonio Herrera and Minho Heo and Kyle Hsu and Jiaheng Hu and Donovon Jackson and Charlotte Le and Yunshuang Li and Kevin Lin and Roy Lin and Zehan Ma and Abhiram Maddukuri and Suvir Mirchandani and Daniel Morton and Tony Nguyen and Abigail O'Neill and Rosario Scalise and Derick Seale and Victor Son and Stephen Tian and Emi Tran and Andrew E. Wang and Yilin Wu and Annie Xie and Jingyun Yang and Patrick Yin and Yunchu Zhang and Osbert Bastani and Glen Berseth and Jeannette Bohg and Ken Goldberg and Abhinav Gupta and Abhishek Gupta and Dinesh Jayaraman and Joseph J Lim and Jitendra Malik and Roberto Martín-Martín and Subramanian Ramamoorthy and Dorsa Sadigh and Shuran Song and Jiajun Wu and Michael C. Yip and Yuke Zhu and Thomas Kollar and Sergey Levine and Chelsea Finn},
  year    = {2024},
  month={2},
  environments = {robotics},
  eprint={2403.12945},
  archivePrefix={arXiv},
  primaryClass={cs.RO},
  url = {https://arxiv.org/abs/2403.12945},
  agents = {n/a},
  evaluation = {human, qualitative},
  other = {human_agent},
}

@misc{open_x_embodiment_rt_x_2023,
  title={Open {X-E}mbodiment: Robotic Learning Datasets and {RT-X} Models},
  author = {Open X-Embodiment Collaboration and Abby O'Neill and Abdul Rehman and Abhiram Maddukuri and Abhishek Gupta and Abhishek Padalkar and Abraham Lee and Acorn Pooley and Agrim Gupta and Ajay Mandlekar and Ajinkya Jain and Albert Tung and Alex Bewley and Alex Herzog and Alex Irpan and Alexander Khazatsky and Anant Rai and Anchit Gupta and Andrew Wang and Anikait Singh and Animesh Garg and Aniruddha Kembhavi and Annie Xie and Anthony Brohan and Antonin Raffin and Archit Sharma and Arefeh Yavary and Arhan Jain and Ashwin Balakrishna and Ayzaan Wahid and Ben Burgess-Limerick and Beomjoon Kim and Bernhard Schölkopf and Blake Wulfe and Brian Ichter and Cewu Lu and Charles Xu and Charlotte Le and Chelsea Finn and Chen Wang and Chenfeng Xu and Cheng Chi and Chenguang Huang and Christine Chan and Christopher Agia and Chuer Pan and Chuyuan Fu and Coline Devin and Danfei Xu and Daniel Morton and Danny Driess and Daphne Chen and Deepak Pathak and Dhruv Shah and Dieter Büchler and Dinesh Jayaraman and Dmitry Kalashnikov and Dorsa Sadigh and Edward Johns and Ethan Foster and Fangchen Liu and Federico Ceola and Fei Xia and Feiyu Zhao and Freek Stulp and Gaoyue Zhou and Gaurav S. Sukhatme and Gautam Salhotra and Ge Yan and Gilbert Feng and Giulio Schiavi and Glen Berseth and Gregory Kahn and Guanzhi Wang and Hao Su and Hao-Shu Fang and Haochen Shi and Henghui Bao and Heni Ben Amor and Henrik I Christensen and Hiroki Furuta and Homer Walke and Hongjie Fang and Huy Ha and Igor Mordatch and Ilija Radosavovic and Isabel Leal and Jacky Liang and Jad Abou-Chakra and Jaehyung Kim and Jaimyn Drake and Jan Peters and Jan Schneider and Jasmine Hsu and Jeannette Bohg and Jeffrey Bingham and Jeffrey Wu and Jensen Gao and Jiaheng Hu and Jiajun Wu and Jialin Wu and Jiankai Sun and Jianlan Luo and Jiayuan Gu and Jie Tan and Jihoon Oh and Jimmy Wu and Jingpei Lu and Jingyun Yang and Jitendra Malik and João Silvério and Joey Hejna and Jonathan Booher and Jonathan Tompson and Jonathan Yang and Jordi Salvador and Joseph J. Lim and Junhyek Han and Kaiyuan Wang and Kanishka Rao and Karl Pertsch and Karol Hausman and Keegan Go and Keerthana Gopalakrishnan and Ken Goldberg and Kendra Byrne and Kenneth Oslund and Kento Kawaharazuka and Kevin Black and Kevin Lin and Kevin Zhang and Kiana Ehsani and Kiran Lekkala and Kirsty Ellis and Krishan Rana and Krishnan Srinivasan and Kuan Fang and Kunal Pratap Singh and Kuo-Hao Zeng and Kyle Hatch and Kyle Hsu and Laurent Itti and Lawrence Yunliang Chen and Lerrel Pinto and Li Fei-Fei and Liam Tan and Linxi "Jim" Fan and Lionel Ott and Lisa Lee and Luca Weihs and Magnum Chen and Marion Lepert and Marius Memmel and Masayoshi Tomizuka and Masha Itkina and Mateo Guaman Castro and Max Spero and Maximilian Du and Michael Ahn and Michael C. Yip and Mingtong Zhang and Mingyu Ding and Minho Heo and Mohan Kumar Srirama and Mohit Sharma and Moo Jin Kim and Naoaki Kanazawa and Nicklas Hansen and Nicolas Heess and Nikhil J Joshi and Niko Suenderhauf and Ning Liu and Norman Di Palo and Nur Muhammad Mahi Shafiullah and Oier Mees and Oliver Kroemer and Osbert Bastani and Pannag R Sanketi and Patrick "Tree" Miller and Patrick Yin and Paul Wohlhart and Peng Xu and Peter David Fagan and Peter Mitrano and Pierre Sermanet and Pieter Abbeel and Priya Sundaresan and Qiuyu Chen and Quan Vuong and Rafael Rafailov and Ran Tian and Ria Doshi and Roberto Mart{'i}n-Mart{'i}n and Rohan Baijal and Rosario Scalise and Rose Hendrix and Roy Lin and Runjia Qian and Ruohan Zhang and Russell Mendonca and Rutav Shah and Ryan Hoque and Ryan Julian and Samuel Bustamante and Sean Kirmani and Sergey Levine and Shan Lin and Sherry Moore and Shikhar Bahl and Shivin Dass and Shubham Sonawani and Shuran Song and Sichun Xu and Siddhant Haldar and Siddharth Karamcheti and Simeon Adebola and Simon Guist and Soroush Nasiriany and Stefan Schaal and Stefan Welker and Stephen Tian and Subramanian Ramamoorthy and Sudeep Dasari and Suneel Belkhale and Sungjae Park and Suraj Nair and Suvir Mirchandani and Takayuki Osa and Tanmay Gupta and Tatsuya Harada and Tatsuya Matsushima and Ted Xiao and Thomas Kollar and Tianhe Yu and Tianli Ding and Todor Davchev and Tony Z. Zhao and Travis Armstrong and Trevor Darrell and Trinity Chung and Vidhi Jain and Vincent Vanhoucke and Wei Zhan and Wenxuan Zhou and Wolfram Burgard and Xi Chen and Xiaolong Wang and Xinghao Zhu and Xinyang Geng and Xiyuan Liu and Xu Liangwei and Xuanlin Li and Yao Lu and Yecheng Jason Ma and Yejin Kim and Yevgen Chebotar and Yifan Zhou and Yifeng Zhu and Yilin Wu and Ying Xu and Yixuan Wang and Yonatan Bisk and Yoonyoung Cho and Youngwoon Lee and Yuchen Cui and Yue Cao and Yueh-Hua Wu and Yujin Tang and Yuke Zhu and Yunchu Zhang and Yunfan Jiang and Yunshuang Li and Yunzhu Li and Yusuke Iwasawa and Yutaka Matsuo and Zehan Ma and Zhuo Xu and Zichen Jeff Cui and Zichen Zhang and Zipeng Lin},
  url = {https://arxiv.org/abs/2310.08864},
  year = {2023},
  month={9},
  eprint={2310.08864},
  archivePrefix={arXiv},
  primaryClass={cs.RO},
  environments = {robotics},
  agents = {n/a},
  evaluation = {human, qualitative},
  other = {human_agent},
}

@inproceedings{arenas2023how,
  title={How to Prompt Your Robot: A PromptBook for Manipulation Skills with Code as Policies},
  author={Montserrat Gonzalez Arenas and Ted Xiao and Sumeet Singh and Vidhi Jain and Allen Z. Ren and Quan Vuong and Jake Varley and Alexander Herzog and Isabel Leal and Sean Kirmani and Dorsa Sadigh and Vikas Sindhwani and Kanishka Rao and Jacky Liang and Andy Zeng},
  booktitle={2nd Workshop on Language and Robot Learning: Language as Grounding},
  year={2023},
  month={9},
  url={https://openreview.net/forum?id=T8AiZj1QdN},
  environments = {robotics},
  agents = {prompting_and_in_context_learning},
  evaluation = {human, qualitative},
  other = {human_agent},
}

@article{hu2023Toward,
  author    = {Yafei Hu and Quanting Xie and Vidhi Jain and Jonathan Francis and Jay Patrikar 
                and Nikhil Keetha and Seungchan Kim and Yaqi Xie and Tianyi Zhang and Shibo Zhao 
                and Yu-Quan Chong and Chen Wang and Katia Sycara and Matthew Johnson-Roberson 
                and Dhruv Batra and Xiaolong Wang and Sebastian Scherer and Zsolt Kira and 
                Fei Xia and Yonatan Bisk},
  title     = {Toward General-Purpose Robots via Foundation Models: A Survey and Meta-Analysis},
  booktitle = {arXiv preprint: arXiv:2312.08782 },
  url={https://robotics-fm-survey.github.io/},
  year      = {2023},
    month={12},
  environments = {robotics},
  agents = {prompting_and_in_context_learning},
  evaluation = {human, qualitative},
  other = {human_agent},
}

@inproceedings{yenamandra2023homerobot,
  title={HomeRobot: Open-Vocabulary Mobile Manipulation},
  author={Sriram Yenamandra and Arun Ramachandran and Karmesh Yadav and Austin S Wang and Mukul Khanna and Theophile Gervet and Tsung-Yen Yang and Vidhi Jain and Alexander Clegg and John M Turner and Zsolt Kira and Manolis Savva and Angel X Chang and Devendra Singh Chaplot and Dhruv Batra and Roozbeh Mottaghi and Yonatan Bisk and Chris Paxton},
  booktitle={7th Annual Conference on Robot Learning},
  year={2023},
    month={6},
  url={https://openreview.net/forum?id=b-cto-fetlz},
  environments = {robotics},
  agents = {n/a},
  evaluation = {human, qualitative},
  other = {human_agent},
}

@article{kwon2023toward,
  title={Toward Grounded Commonsense Reasoning},
  author={Kwon, Minae and Hu, Hengyuan and Myers, Vivek and Karamcheti, Siddharth and Dragan, Anca and Sadigh, Dorsa},
  journal={arXiv preprint arXiv:2306.08651},
  url={https://arxiv.org/abs/2306.08651},
  year={2023},
    month={11},
  environments = {robotics},
  agents = {prompting_and_in_context_learning},
  evaluation = {human, qualitative},
  other = {human_agent},
}

@misc{shin2023benchmarks,
  title={Benchmarks and Algorithms for Offline Preference-Based Reward Learning}, 
  author={Daniel Shin and Anca D. Dragan and Daniel S. Brown},
  year={2023},
    month={3},
  eprint={2301.01392},
  archivePrefix={arXiv},
  primaryClass={cs.LG},
  url={https://arxiv.org/pdf/2301.01392.pdf},
  environments = {robotics},
  agents = {n/a},
  evaluation = {human, qualitative},
  other = {human_agent},
}

@InProceedings{pmlr-v205-xiong23a,
    title = {RoboTube: Learning Household Manipulation from Human Videos with Simulated Twin Environments},
    author = {Xiong, Haoyu and Fu, Haoyuan and Zhang, Jieyi and Bao, Chen and Zhang, Qiang and Huang, Yongxi and Xu, Wenqiang and Garg, Animesh and Lu, Cewu},
    booktitle = {Proceedings of The 6th Conference on Robot Learning},
    pages = {1--10},
    year = {2023},
    editor = {Liu, Karen and Kulic, Dana and Ichnowski, Jeff},
    volume = {205},
    series = {Proceedings of Machine Learning Research},
    month = {12},
    publisher =  {PMLR},
    pdf = {https://proceedings.mlr.press/v205/xiong23a/xiong23a.pdf},
    url = {https://proceedings.mlr.press/v205/xiong23a.html},
    environments = {implicit_objectives, robotics},
    agents = {reinforcement_learning, agents_with_memory},
    evaluation = {human, rule_based},
    other = {simulated_humans}
}


@inproceedings{saycan2022arxiv,
    title={Do As I Can and Not As I Say: Grounding Language in Robotic Affordances},
    author={Michael Ahn and Anthony Brohan and Noah Brown and Yevgen Chebotar and Omar Cortes and Byron David and Chelsea Finn and Chuyuan Fu and Keerthana Gopalakrishnan and Karol Hausman and Alex Herzog and Daniel Ho and Jasmine Hsu and Julian Ibarz and Brian Ichter and Alex Irpan and Eric Jang and Rosario Jauregui Ruano and Kyle Jeffrey and Sally Jesmonth and Nikhil Joshi and Ryan Julian and Dmitry Kalashnikov and Yuheng Kuang and Kuang-Huei Lee and Sergey Levine and Yao Lu and Linda Luu and Carolina Parada and Peter Pastor and Jornell Quiambao and Kanishka Rao and Jarek Rettinghouse and Diego Reyes and Pierre Sermanet and Nicolas Sievers and Clayton Tan and Alexander Toshev and Vincent Vanhoucke and Fei Xia and Ted Xiao and Peng Xu and Sichun Xu and Mengyuan Yan and Andy Zeng},
    booktitle={arXiv preprint arXiv:2204.01691},
    year={2022},
    month={8},
    url = {https://say-can.github.io/},
    environments = {mixed_objectives, implicit_objectives, robotics},
    agents = {finetuning, reinforcement_learning, agents_with_memory},
    evaluation = {human, rule_based, model_based},
    other = {simulated_humans}
}

@inproceedings{huang2022inner,
    title={Inner Monologue: Embodied Reasoning through Planning with Language Models},
    author={Wenlong Huang and Fei Xia and Ted Xiao and Harris Chan and Jacky Liang and Pete Florence and Andy Zeng and Jonathan Tompson and Igor Mordatch and Yevgen Chebotar and Pierre Sermanet and Noah Brown and Tomas Jackson and Linda Luu and Sergey Levine and Karol Hausman and Brian Ichter},
    booktitle={arXiv preprint arXiv:2207.05608},
    year={2022},
    month={6},
    url = {https://arxiv.org/abs/2207.05608},
    environments = {mixed_objectives, implicit_objectives, robotics},
    agents = {finetuning, reinforcement_learning, agents_with_memory},
    evaluation = {human, rule_based, model_based},
    other = {simulated_humans}
}

@inproceedings{Wang2023One,
    title={One Policy to Dress Them All: Learning to Dress People with Diverse Poses and Garments},
    author={Wang, Yufei and Sun, Zhanyi and Erickson, Zackory and Held, David},
    booktitle={Robotics: Science and Systems (RSS)},
    year={2023},
    month={6},
    url = {https://arxiv.org/abs/2306.12372},
    environments = {robotics},
    agents = {reinforcement_learning},
    evaluation = {human, rule_based},
    other = {human_agent}
}     

@misc{wang2023cogail,
    title={Co-GAIL: Learning Diverse Strategies for Human-Robot Collaboration}, 
    author={Chen Wang and Claudia Pérez-D'Arpino and Danfei Xu and Li Fei-Fei and C. Karen Liu and Silvio Savarese},
    year={2023},
    month={9},
    url = {https://arxiv.org/abs/2108.06038},
    eprint={2108.06038},
    archivePrefix={arXiv},
    primaryClass={cs.RO},
    environments = {collaboration, mixed_objectives, robotics},
    agents = {two_agents, reinforcement_learning},
    evaluation = {human},
    other = {human_agent, simulated_humans}
}

@misc{shi2024yell,
    title={Yell At Your Robot: Improving On-the-Fly from Language Corrections}, 
    author={Lucy Xiaoyang Shi and Zheyuan Hu and Tony Z. Zhao and Archit Sharma and Karl Pertsch and Jianlan Luo and Sergey Levine and Chelsea Finn},
    year={2024},
    month={3},
    url={https://arxiv.org/abs/2403.12910},
    eprint={2403.12910},
    archivePrefix={arXiv},
    primaryClass={cs.RO},
    environments = {collaboration, mixed_objectives, robotics},
    agents = {two_agents, finetuning, reinforcement_learning, agents_with_memory},
    evaluation = {human},
    other = {human_agent}
}

@article{sheridan2016human,
    title={Human--robot interaction: status and challenges},
    author={Sheridan, Thomas B},
    journal={Human factors},
    month={4},
    url={https://journals.sagepub.com/doi/10.1177/0018720816644364},
    volume={58},
    number={4},
    pages={525--532},
    year={2016},
    publisher={SAGE Publications Sage CA: Los Angeles, CA},
    environments = {collaboration, mixed_objectives, robotics},
    agents = {two_agents, finetuning, reinforcement_learning},
    evaluation = {human},
    other = {human_agent}
}


@article{onnasch2021taxonomy,
    title={A taxonomy to structure and analyze human--robot interaction},
    author={Onnasch, Linda and Roesler, Eileen},
    journal={International Journal of Social Robotics},
    volume={13},
    number={4},
    pages={833--849},
    year={2021},
    publisher={Springer},
    month={6},
    url={https://link.springer.com/article/10.1007/s12369-020-00666-5},
    environments = {collaboration, mixed_objectives, robotics},
    agents = {two_agents},
    evaluation = {human},
    other = {human_agent}
}

@article{robinson2023robotic,
    title={Robotic vision for human-robot interaction and collaboration: A survey and systematic review},
    author={Robinson, Nicole and Tidd, Brendan and Campbell, Dylan and Kuli{\'c}, Dana and Corke, Peter},
    journal={ACM Transactions on Human-Robot Interaction},
    volume={12},
    number={1},
    pages={1--66},
    year={2023},
    month={7},
    url={https://arxiv.org/abs/2307.15363},
    publisher={ACM New York, NY},
    environments = {collaboration, mixed_objectives, implicit_objectives, robotics},
    agents = {two_agents, agent_teams, agents_with_personas},
    evaluation = {human, rule_based},
    other = {human_agent, simulated_humans}
}

@article{dahiya2023survey,
    title={A survey of multi-agent Human--Robot Interaction systems},
    author={Dahiya, Abhinav and Aroyo, Alexander M and Dautenhahn, Kerstin and Smith, Stephen L},
    journal={Robotics and Autonomous Systems},
    volume={161},
    pages={104335},
    year={2022},
    month={10},
    url={https://arxiv.org/abs/2212.05286},
    publisher={Elsevier},
    environments = {collaboration, mixed_objectives, robotics},
    agents = {two_agents, more_than_three_agents, agent_teams},
    evaluation = {human},
    other = {human_agent}
}

@article{10.1145/3570169,
    author = {Urakami, Jacqueline and Seaborn, Katie},
    title = {Nonverbal Cues in Human Robot Interaction: A Communication Studies Perspective},
    year = {2023},
    issue_date = {June 2023},
    publisher = {Association for Computing Machinery},
    address = {New York, NY, USA},
    volume = {12},
    number = {2},
    url = {https://doi.org/10.1145/3570169},
    doi = {10.1145/3570169},
    journal = {J. Hum.-Robot Interact.},
    month = {3},
    articleno = {22},
    numpages = {21},
    keywords = {nonverbal codes, communication studies, human robot interaction, nonverbal communication, Robotics},
    environments = {collaboration, mixed_objectives, implicit_objectives, robotics},
    agents = {two_agents},
    evaluation = {human},
    other = {human_agent}
}

@article{10.1145/3571718,
    author = {Winkle, Katie and Lagerstedt, Erik and Torre, Ilaria and Offenwanger, Anna},
    title = {15 Years of (Who)man Robot Interaction: Reviewing the H in Human-Robot Interaction},
    year = {2023},
    issue_date = {September 2023},
    publisher = {Association for Computing Machinery},
    address = {New York, NY, USA},
    volume = {12},
    number = {3},
    url = {https://doi.org/10.1145/3571718},
    doi = {10.1145/3571718},
    abstract = {Recent work identified a concerning trend of disproportional gender representation in research participants in Human–Computer Interaction (HCI). Motivated by the fact that Human–Robot Interaction (HRI) shares many participant practices with HCI, we explored whether this trend is mirrored in our field. By producing a dataset covering participant gender representation in all 684 full papers published at the HRI conference from 2006–2021, we identify current trends in HRI research participation. We find an over-representation of men in research participants to date, as well as inconsistent and/or incomplete gender reporting, which typically engages in a binary treatment of gender at odds with published best practice guidelines. We further examine if and how participant gender has been considered in user studies to date, in-line with current discourse surrounding the importance and/or potential risks of gender based analyses. Finally, we complement this with a survey of HRI researchers to examine correlations between who is doing with the who is taking part, to further reflect on factors which seemingly influence gender bias in research participation across different sub-fields of HRI. Through our analysis, we identify areas for improvement, but also reason for optimism, and derive some practical suggestions for HRI researchers going forward.},
    journal = {J. Hum.-Robot Interact.},
    month = {4},
    articleno = {28},
    numpages = {28},
    keywords = {Gender, systematic review, user study methodologies, participant recruitment, inclusivity},
    environments = {robotics},
    agents = {two_agents},
    evaluation = {human},
    other = {human_agent}
}


@inproceedings{de2018towards,
  title={Towards a robust interactive and learning social robot},
  author={De Jong, Michiel and Zhang, Kevin and Roth, Aaron M and Rhodes, Travers and Schmucker, Robin and Zhou, Chenghui and Ferreira, Sofia and Cartucho, Jo{\~a}o and Veloso, Manuela},
  booktitle={Proceedings of the 17th International Conference on Autonomous Agents and MultiAgent Systems},
  pages={883--891},
  year={2018},
  month={7},
  url="https://www.ifaamas.org/Proceedings/aamas2018/pdfs/p883.pdf",
  environments = {implicit_objectives, robotics},
  agents = {two_agents},
  evaluation = {rule_based},
  other = {simulated_humans, fully_omniscient},
}

@inproceedings{madan2024rabbit,
  title={RABBIT: A Robot-Assisted Bed Bathing System with Multimodal Perception and Integrated Compliance},
  author={Madan, Rishabh and Valdez, Skyler and Kim, David and Fang, Sujie and Zhong, Luoyan and Virtue, Diego T and Bhattacharjee, Tapomayukh},
  booktitle={Proceedings of the 2024 ACM/IEEE International Conference on Human-Robot Interaction},
  pages={472--481},
  url={https://dl.acm.org/doi/abs/10.1145/3610977.3634989?casa_token=a6V3l5hdBikAAAAA:uYgKkWI2ccP7P9oeg-frTNCa4SzU4s77rIYzvo4IxzaVZv9sqrl-tYeXvkksYr1uAp7IdjgNF0Etvw},
  year={2024},
  month={3},
  environments = {robotics},
  agents = {n/a},
  evaluation = {human, qualitative},
  other = {human_agent, health}
}

@inproceedings{ye2022rcare,
  title={Rcare world: A human-centric simulation world for caregiving robots},
  author={Ye, Ruolin and Xu, Wenqiang and Fu, Haoyuan and Jenamani, Rajat Kumar and Nguyen, Vy and Lu, Cewu and Dimitropoulou, Katherine and Bhattacharjee, Tapomayukh},
  booktitle={2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
  pages={33--40},
  url={https://ieeexplore.ieee.org/abstract/document/9982244?casa_token=FSGmJuwmfKYAAAAA:i8sR8Icc86I1DBMR58AexWGSUUjefJlKReaJ8CCEz9y2jJQyWqxxpxGqyBkMStIx2PMzM7jnyA},
  year={2022},
  month={10},
  organization={IEEE},
  environments = {embodied, robotics},
  agents = {two_agents},
  evaluation = {human},
  other = {human_agent, health}
}


### Modeling

#### In-context Learning
@article{modeling/in-context-learning,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}

@article{zhi2024pragmatic,
  title={Pragmatic Instruction Following and Goal Assistance via Cooperative Language-Guided Inverse Planning},
  author={Zhi-Xuan, Tan and Ying, Lance and Mansinghka, Vikash and Tenenbaum, Joshua B},
  journal={arXiv preprint arXiv:2402.17930},
  year={2024},
  month={2},
  url={https://arxiv.org/abs/2402.17930},
  environments = {collaboration, mixed_objectives, implicit_objectives, embodied},
  agents = {prompting_and_in_context_learning},
  evaluation = {rule_based},
  other = {simulated_humans}
}

@article{fu2024autoguide,
  title={AutoGuide: Automated Generation and Selection of State-Aware Guidelines for Large Language Model Agents},
  author={Fu, Yao and Kim, Dong-Ki and Kim, Jaekyeom and Sohn, Sungryull and Logeswaran, Lajanugen and Bae, Kyunghoon and Lee, Honglak},
  journal={arXiv preprint arXiv:2403.08978},
  year={2024},
  month={3},
  url={https://arxiv.org/abs/2403.08978},
  environments={virtual},
  agents={prompting_and_in_context_learning},
  evaluation={rule_based},
  other={n/a}
}

@misc{wang2023voyager,
    title={Voyager: An Open-Ended Embodied Agent with Large Language Models}, 
    author={Guanzhi Wang and Yuqi Xie and Yunfan Jiang and Ajay Mandlekar and Chaowei Xiao and Yuke Zhu and Linxi Fan and Anima Anandkumar},
    year={2023},
    month={5},
    url={https://arxiv.org/abs/2305.16291},
    eprint={2305.16291},
    archivePrefix={arXiv},
    primaryClass={cs.AI},
    environments = {mixed_objectives, implicit_objectives, embodied},
    agents = {prompting_and_in_context_learning, agents_with_memory},
    evaluation = {rule_based},
    other = {simulated_humans}
}

@misc{kim2023language,
    title={Language Models can Solve Computer Tasks}, 
    author={Geunwoo Kim and Pierre Baldi and Stephen McAleer},
    year={2023},
    month={11},
    url={https://arxiv.org/abs/2303.17491},
    eprint={2303.17491},
    archivePrefix={arXiv},
    primaryClass={cs.CL},
    environments = {mixed_objectives, implicit_objectives, virtual},
    agents = {prompting_and_in_context_learning},
    evaluation = {rule_based},
    other = {simulated_humans}
}

@misc{ma2024laser,
    title={LASER: LLM Agent with State-Space Exploration for Web Navigation}, 
    author={Kaixin Ma and Hongming Zhang and Hongwei Wang and Xiaoman Pan and Wenhao Yu and Dong Yu},
    year={2024},
    month={2},
    url={https://arxiv.org/abs/2309.08172},
    eprint={2309.08172},
    archivePrefix={arXiv},
    primaryClass={cs.CL},
    environments = {mixed_objectives, implicit_objectives, virtual},
    agents = {prompting_and_in_context_learning, agents_with_memory},
    evaluation = {rule_based},
    other = {simulated_humans}
}

@misc{sridhar2023hierarchical,
    title={Hierarchical Prompting Assists Large Language Model on Web Navigation}, 
    author={Abishek Sridhar and Robert Lo and Frank F. Xu and Hao Zhu and Shuyan Zhou},
    year={2023},
    month={10},
    url={https://arxiv.org/abs/2305.14257},
    eprint={2305.14257},
    archivePrefix={arXiv},
    primaryClass={cs.CL},
    environments = {mixed_objectives, implicit_objectives, virtual},
    agents = {prompting_and_in_context_learning, agents_with_memory},
    evaluation = {rule_based},
    other = {simulated_humans}
}

@inproceedings{zheng2024synapse,
    title={Synapse: Trajectory-as-Exemplar Prompting with Memory for Computer Control},
    author={Longtao Zheng and Rundong Wang and Xinrun Wang and Bo An},
    booktitle={The Twelfth International Conference on Learning Representations},
    year={2024},
    month={1},
    url={https://openreview.net/forum?id=Pc8AU1aF5e},
    environments = {mixed_objectives, implicit_objectives, virtual},
    agents = {prompting_and_in_context_learning, agents_with_memory},
    evaluation = {rule_based},
    other = {simulated_humans}
}

@inproceedings{sun2023adaplanner,
    title={AdaPlanner: Adaptive Planning from Feedback with Language Models},
    author={Haotian Sun and Yuchen Zhuang and Lingkai Kong and Bo Dai and Chao Zhang},
    booktitle={Thirty-seventh Conference on Neural Information Processing Systems},
    year={2023},
    month={11},
    url={https://openreview.net/forum?id=rnKgbKmelt},
    environments = {mixed_objectives, implicit_objectives, text},
    agents = {prompting_and_in_context_learning, agents_with_memory},
    evaluation = {rule_based},
    other = {simulated_humans}
}

@misc{wu2023spring,
    title={SPRING: Studying the Paper and Reasoning to Play Games}, 
    author={Yue Wu and Shrimai Prabhumoye and So Yeon Min and Yonatan Bisk and Ruslan Salakhutdinov and Amos Azaria and Tom Mitchell and Yuanzhi Li},
    year={2023},
    month={5},
    url={https://arxiv.org/abs/2305.15486},
    eprint={2305.15486},
    archivePrefix={arXiv},
    primaryClass={cs.AI},
    environments = {mixed_objectives, implicit_objectives, text},
    agents = {prompting_and_in_context_learning, agents_with_memory},
    evaluation = {rule_based},
    other = {simulated_humans}
}

@misc{nair2023dera,
    title={DERA: Enhancing Large Language Model Completions with Dialog-Enabled Resolving Agents}, 
    author={Varun Nair and Elliot Schumacher and Geoffrey Tso and Anitha Kannan},
    year={2023},
    month={3},
    url={https://arxiv.org/abs/2303.17071},
    eprint={2303.17071},
    archivePrefix={arXiv},
    primaryClass={cs.CL},
    environments = {collaboration, mixed_objectives, implicit_objectives, text},
    agents = {prompting_and_in_context_learning, agent_teams, agents_with_memory},
    evaluation = {rule_based},
    other = {simulated_humans}
}

@misc{fu2023improving,
      title={Improving Language Model Negotiation with Self-Play and In-Context Learning from AI Feedback}, 
      author={Yao Fu and Hao Peng and Tushar Khot and Mirella Lapata},
      year={2023},
      environments = {mixed_objectives, text},
      agents = {prompting_and_in_context_learning, two_agents},
      evaluation = {rule_based, human},
      other = {simulated_humans},
      eprint={2305.10142},
      url = {https://arxiv.org/abs/2305.10142},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

@misc{croissant2023appraisalbased,
      title={An Appraisal-Based Chain-Of-Emotion Architecture for Affective Language Model Game Agents}, 
      author={Maximilian Croissant and Madeleine Frister and Guy Schofield and Cade McCall},
      year={2023},
      environments = {mixed_objectives, text},
      agents = {prompting_and_in_context_learning, two_agents, agents_with_memory, agents_with_personas},
      evaluation = {rule_based, human},
      other = {human_agent},
      url = {https://arxiv.org/abs/2309.05076},
      eprint={2309.05076},
      archivePrefix={arXiv},
      primaryClass={cs.AI}
}

@misc{wang2023jarvis1,
      title={JARVIS-1: Open-World Multi-task Agents with Memory-Augmented Multimodal Language Models}, 
      author={Zihao Wang and Shaofei Cai and Anji Liu and Yonggang Jin and Jinbing Hou and Bowei Zhang and Haowei Lin and Zhaofeng He and Zilong Zheng and Yaodong Yang and Xiaojian Ma and Yitao Liang},
      year={2023},
      environments = {embodied},
      agents = {prompting_and_in_context_learning, two_agents, agents_with_memory},
      evaluation = {rule_based},
      other = {human_agent},
      url = {https://arxiv.org/abs/2311.05997},
      eprint={2311.05997},
      archivePrefix={arXiv},
      primaryClass={cs.AI}
}

@misc{jinxin2023cgmi,
      title={CGMI: Configurable General Multi-Agent Interaction Framework}, 
      author={Shi Jinxin and Zhao Jiabao and Wang Yilei and Wu Xingjiao and Li Jiawen and He Liang},
      year={2023},
      environments = {mixed_objectives, text},
      agents = {prompting_and_in_context_learning, more_than_three_agents, agents_with_memory, agents_with_personas},
      evaluation = {human, rule_based},
      other = {simulated_humans},
      url = {https://arxiv.org/abs/2308.12503},
      eprint={2308.12503},
      archivePrefix={arXiv},
      primaryClass={cs.AI}
}

@article{Zhang2024, 
  title={ProAgent: Building Proactive Cooperative Agents with Large Language Models}, 
  volume={38}, 
  url={https://ojs.aaai.org/index.php/AAAI/article/view/29710}, 
  DOI={10.1609/aaai.v38i16.29710}, 
  number={16}, 
  journal={Proceedings of the AAAI Conference on Artificial Intelligence}, 
  author={Zhang, Ceyao and Yang, Kaijie and Hu, Siyi and Wang, Zihao and Li, Guanghe and Sun, Yihang and Zhang, Cheng and Zhang, Zhaowei and Liu, Anji and Zhu, Song-Chun and Chang, Xiaojun and Zhang, Junge and Yin, Feng and Liang, Yitao and Yang, Yaodong}, 
  year={2024}, 
  month={03}, 
  pages={17591-17599},
  environments = {mixed_objectives, embodied},
  agents = {prompting_and_in_context_learning, agent_teams, agents_with_memory},
  evaluation = {qualitative, rule_based},
  other = {n/a}
}

#### Finetuning
@article{modeling/finetuning,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}

@inproceedings{lai2023werewolf,
  title={Werewolf among us: Multimodal resources for modeling persuasion behaviors in social deduction games},
  author={Lai, Bolin and Zhang, Hongxin and Liu, Miao and Pariani, Aryan and Ryan, Fiona and Jia, Wenqi and Hayati, Shirley Anugrah and Rehg, James and Yang, Diyi},
  booktitle={Findings of the Association for Computational Linguistics: ACL 2023},
  pages={6570--6588},
  year={2023},
  url={https://aclanthology.org/2023.findings-acl.411/},
  month={7},
  environments={virtual, collaboration, competition, mixed_objectives},
  agents={finetuning},
  evaluation={rule_based, human},
  other={human_agent}
}

@article{song2024trial,
  title={Trial and Error: Exploration-Based Trajectory Optimization for LLM Agents},
  author={Song, Yifan and Yin, Da and Yue, Xiang and Huang, Jie and Li, Sujian and Lin, Bill Yuchen},
  journal={arXiv preprint arXiv:2403.02502},
  year={2024},
  month={3},
  url={https://arxiv.org/abs/2403.02502},
  environments={virtual},
  agents={finetuning},
  evaluation={rule_based},
  other={n/a}
}

@article{lai2024autowebglm,
  title={AutoWebGLM: Bootstrap And Reinforce A Large Language Model-based Web Navigating Agent},
  author={Lai, Hanyu and Liu, Xiao and Iong, Iat Long and Yao, Shuntian and Chen, Yuxuan and Shen, Pengbo and Yu, Hao and Zhang, Hanchen and Zhang, Xiaohan and Dong, Yuxiao and others},
  journal={arXiv preprint arXiv:2404.03648},
  year={2024},
  month={4},
  url={https://arxiv.org/abs/2404.03648},
  environments={virtual},
  agents={prompting_and_in_context_learning, pretraining, finetuning},
  evaluation={rule_based},
  other={n/a}
}

@article{chen2024agent,
  title={Agent-FLAN: Designing Data and Methods of Effective Agent Tuning for Large Language Models},
  author={Chen, Zehui and Liu, Kuikun and Wang, Qiuchen and Zhang, Wenwei and Liu, Jiangning and Lin, Dahua and Chen, Kai and Zhao, Feng},
  journal={arXiv preprint arXiv:2403.12881},
  year={2024},
  month={3},
  url={https://arxiv.org/abs/2403.12881},
  environments={virtual},
  agents={finetuning},
  evaluation={rule_based},
  other={n/a}
}

@misc{gur2023understanding,
    title={Understanding HTML with Large Language Models}, 
    author={Izzeddin Gur and Ofir Nachum and Yingjie Miao and Mustafa Safdari and Austin Huang and Aakanksha Chowdhery and Sharan Narang and Noah Fiedel and Aleksandra Faust},
    year={2023},
    month={5},
    url={https://arxiv.org/abs/2210.03945},
    eprint={2210.03945},
    archivePrefix={arXiv},
    primaryClass={cs.LG},
    environments = {mixed_objectives, implicit_objectives, virtual},
    agents = {prompting_and_in_context_learning, finetuning},
    evaluation = {rule_based},
    other = {simulated_humans}
}

@inproceedings{
    furuta2023instructionfinetuned,
    title={Instruction-Finetuned Foundation Models for Multimodal Web Navigation},
    author={Hiroki Furuta and Ofir Nachum and Kuang-Huei Lee and Yutaka Matsuo and Shixiang Shane Gu and Izzeddin Gur},
    booktitle={ICLR 2023 Workshop on Mathematical and Empirical Understanding of Foundation Models},
    year={2023},
    month={5},
    url={https://openreview.net/forum?id=oLc9sGOBbc},
    environments = {mixed_objectives, implicit_objectives, virtual},
    agents = {prompting_and_in_context_learning, finetuning, agents_with_memory},
    evaluation = {rule_based},
    other = {simulated_humans}
}

@misc{yao2023react,
    title={ReAct: Synergizing Reasoning and Acting in Language Models}, 
    author={Shunyu Yao and Jeffrey Zhao and Dian Yu and Nan Du and Izhak Shafran and Karthik Narasimhan and Yuan Cao},
    year={2023},
    month={10},
    eprint={2210.03629},
    url={https://arxiv.org/abs/2210.03629},
    archivePrefix={arXiv},
    primaryClass={cs.CL},
    environments = {mixed_objectives, implicit_objectives, virtual},
    agents = {prompting_and_in_context_learning, finetuning, agents_with_memory},
    evaluation = {rule_based},
    other = {simulated_humans}
}

@inproceedings{gur2024a,
    title={A Real-World WebAgent with Planning, Long Context Understanding, and Program Synthesis},
    author={Izzeddin Gur and Hiroki Furuta and Austin V Huang and Mustafa Safdari and Yutaka Matsuo and Douglas Eck and Aleksandra Faust},
    booktitle={The Twelfth International Conference on Learning Representations},
    year={2024},
    month={1},
    url={https://openreview.net/forum?id=9JQtrumvg8},
    environments = {mixed_objectives, implicit_objectives, virtual},
    agents = {prompting_and_in_context_learning, finetuning, agents_with_memory},
    evaluation = {rule_based},
    other = {simulated_humans}
}

@inproceedings{shaw2023from,
    title={From Pixels to {UI} Actions: Learning to Follow Instructions via Graphical User Interfaces},
    author={Peter Shaw and Mandar Joshi and James Cohan and Jonathan Berant and Panupong Pasupat and Hexiang Hu and Urvashi Khandelwal and Kenton Lee and Kristina Toutanova},
    booktitle={Thirty-seventh Conference on Neural Information Processing Systems},
    year={2023},
    month={11},
    url={https://openreview.net/forum?id=3PjCt4kmRx},
    environments = {mixed_objectives, implicit_objectives, virtual},
    agents = {prompting_and_in_context_learning, finetuning, agents_with_memory},
    evaluation = {rule_based},
    other = {simulated_humans}
}

@misc{zheng2024gpt4vision,
    title={GPT-4V(ision) is a Generalist Web Agent, if Grounded}, 
    author={Boyuan Zheng and Boyu Gou and Jihyung Kil and Huan Sun and Yu Su},
    year={2024},
    month={1},
    url={https://arxiv.org/abs/2401.01614},
    eprint={2401.01614},
    archivePrefix={arXiv},
    primaryClass={cs.IR},
    environments = {mixed_objectives, implicit_objectives, virtual},
    agents = {prompting_and_in_context_learning, finetuning, agents_with_memory},
    evaluation = {rule_based},
    other = {simulated_humans}
}

@misc{kil2024dualview,
    title={Dual-View Visual Contextualization for Web Navigation}, 
    author={Jihyung Kil and Chan Hee Song and Boyuan Zheng and Xiang Deng and Yu Su and Wei-Lun Chao},
    year={2024},
    month={2},
    url={https://arxiv.org/abs/2402.04476},
    eprint={2402.04476},
    archivePrefix={arXiv},
    primaryClass={cs.CV},
    environments = {mixed_objectives, implicit_objectives, virtual},
    agents = {prompting_and_in_context_learning, finetuning, agents_with_memory},
    evaluation = {rule_based},
    other = {simulated_humans}
}

@misc{yildirim2024learning,
      title={Learning Social Navigation from Demonstrations with Deep Neural Networks}, 
      author={Yigit Yildirim and Emre Ugur},
      year={2024},
      eprint={2404.11246},
      archivePrefix={arXiv},
      primaryClass={cs.RO},
      url={https://arxiv.org/abs/2404.11246},
      month={4},
      environments = {collaboration, robotics},
      agents = {finetuning, two_agents, more_than_three_agents},
      evaluation = {rule_based, human},
      other = {human_agent}
}

@article{karnan2022scand,
  title = {Socially CompliAnt Navigation Dataset (SCAND): A Large-Scale Dataset Of Demonstrations For Social Navigation},
  author = {Karnan, Haresh and Nair, Anirudh and Xiao, Xuesu and Warnell, Garrett and Pirk, S{\"o}ren and Toshev, Alexander and Hart, Justin and Biswas, Joydeep and Stone, Peter},
  journal={IEEE Robotics and Automation Letters},
  year = {2022},
  organization = {IEEE},
  month = {10},
  url={https://www.cs.utexas.edu/~xiao/SCAND/SCAND.html},
  environments = {collaboration, robotics},
  agents = {finetuning, two_agents},
  evaluation = {rule_based},
  other = {human_agent}
}

@INPROCEEDINGS{9561973,
  author={Cui, Yuxiang and Zhang, Haodong and Wang, Yue and Xiong, Rong},
  booktitle={2021 IEEE International Conference on Robotics and Automation (ICRA)}, 
  title={Learning World Transition Model for Socially Aware Robot Navigation}, 
  year={2021},
  pages={9262-9268},
  month           = {5},
  url={https://ieeexplore.ieee.org/document/9561973},
  environments = {collaboration, embodied},
  agents = {finetuning, two_agents},
  evaluation = {rule_based},
  other = {human_agent}
}

@misc{zhang2022danli,
      title={DANLI: Deliberative Agent for Following Natural Language Instructions}, 
      author={Yichi Zhang and Jianing Yang and Jiayi Pan and Shane Storks and Nikhil Devraj and Ziqiao Ma and Keunwoo Peter Yu and Yuwei Bao and Joyce Chai},
      year={2022},
      eprint={2210.12485},
      archivePrefix={arXiv},
      primaryClass={cs.AI},
      url={https://arxiv.org/abs/2210.12485},
      month={10},
      environments = {collaboration, embodied},
      agents = {finetuning, two_agents},
      evaluation = {rule_based},
      other = {human_agent}
}

@misc{kim2024contextaware,
      title={Context-Aware Planning and Environment-Aware Memory for Instruction Following Embodied Agents}, 
      author={Byeonghwi Kim and Jinyeon Kim and Yuyeong Kim and Cheolhong Min and Jonghyun Choi},
      year={2024},
      eprint={2308.07241},
      archivePrefix={arXiv},
      primaryClass={cs.RO},
      url={https://arxiv.org/abs/2308.07241},
      month={8},
      environments = {collaboration, embodied},
      agents = {finetuning, two_agents},
      evaluation = {rule_based},
      other = {human_agent}
}

@misc{wan2023handmethat,
      title={HandMeThat: Human-Robot Communication in Physical and Social Environments}, 
      author={Yanming Wan and Jiayuan Mao and Joshua B. Tenenbaum},
      year={2023},
      eprint={2310.03779},
      archivePrefix={arXiv},
      primaryClass={cs.AI},
      url={https://arxiv.org/abs/2310.03779},
      month={10},
      environments = {collaboration, embodied},
      agents = {finetuning, two_agents},
      evaluation = {rule_based},
      other = {human_agent}
}

@article{gao2022dialfred,
  title={Dialfred: Dialogue-enabled agents for embodied instruction following},
  author={Gao, Xiaofeng and Gao, Qiaozi and Gong, Ran and Lin, Kaixiang and Thattai, Govind and Sukhatme, Gaurav S},
  journal={IEEE Robotics and Automation Letters},
  volume={7},
  number={4},
  pages={10049--10056},
  year={2022},
  publisher={IEEE},
  month = {7},
  url = {https://ieeexplore.ieee.org/abstract/document/9837390/citations#citations},
  environments = {collaboration, embodied},
  agents = {finetuning, two_agents},
  evaluation = {rule_based},
  other = {human_agent}
}

@article{zhan2018generative,
  title={Generative multi-agent behavioral cloning},
  author={Zhan, Eric and Zheng, Stephan and Yue, Yisong and Lucey, Patrick},
  journal={arXiv preprint arXiv:1803.07612},
  volume={2},
  year={2018},
  month={3},
  url={https://arxiv.org/abs/1803.07612},
  environments = {collaboration, embodied},
  agents = {finetuning, two_agents, more_than_three_agents},
  evaluation = {rule_based},
  other = {n/a}
}

@article{song2018multi,
  title={Multi-agent generative adversarial imitation learning},
  author={Song, Jiaming and Ren, Hongyu and Sadigh, Dorsa and Ermon, Stefano},
  journal={Advances in neural information processing systems},
  volume={31},
  year={2018},
  month={12},
  url={https://proceedings.neurips.cc/paper/2018/hash/240c945bb72980130446fc2b40fbb8e0-Abstract.html},
  environments = {collaboration},
  agents = {finetuning, two_agents, more_than_three_agents},
  evaluation = {rule_based},
  other = {n/a}
}

@article{meng2023offline,
  title={Offline pre-trained multi-agent decision transformer},
  author={Meng, Linghui and Wen, Muning and Le, Chenyang and Li, Xiyun and Xing, Dengpeng and Zhang, Weinan and Wen, Ying and Zhang, Haifeng and Wang, Jun and Yang, Yaodong and others},
  journal={Machine Intelligence Research},
  volume={20},
  number={2},
  pages={233--248},
  year={2023},
  publisher={Springer},
  month={3},
  url={https://link.springer.com/article/10.1007/s11633-022-1383-7},
  environments = {collaboration, competition, mixed_objectives, embodied},
  agents = {finetuning, more_than_three_agents, agent_teams},
  evaluation = {rule_based},
  other = {n/a}
}

@misc{ding2024entgpt,
    title={EntGPT: Linking Generative Large Language Models with Knowledge Bases}, 
    author={Yifan Ding and Amrit Poudel and Qingkai Zeng and Tim Weninger and Balaji Veeramani and Sanmitra Bhattacharya},
    year={2024},
    month={2},
    eprint={2402.06738},
    archivePrefix={arXiv},
    primaryClass={cs.CL},
    url = "https://arxiv.org/pdf/2402.06738",
    environments = {text},
    agents = {pretraining, finetuning},
    evaluation = {model_based},
    other = {n/a},
}

@article{yu2024self,
  title={Self-chained image-language model for video localization and question answering},
  author={Yu, Shoubin and Cho, Jaemin and Yadav, Prateek and Bansal, Mohit},
  journal={Advances in Neural Information Processing Systems},
  volume={36},
  year={2024},
  month={11},
  url = "https://proceedings.neurips.cc/paper_files/paper/2023/file/f22a9af8dbb348952b08bd58d4734b50-Paper-Conference.pdf",
  environments = {text, virtual},
  agents = {pretraining, finetuning},
  evaluation = {model_based},
  other = {n/a},
}

@misc{li2024llms,
      title={LLMs Meet Long Video: Advancing Long Video Comprehension with An Interactive Visual Adapter in LLMs}, 
      author={Yunxin Li and Xinyu Chen and Baotain Hu and Min Zhang},
      year={2024},
      month={2},
      eprint={2402.13546},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url = "https://arxiv.org/pdf/2402.13546",
      environments = {text, virtual},
  agents = {pretraining, finetuning},
  evaluation = {model_based},
  other = {n/a},
}

@InProceedings{Xie_2023_ICCV,
    author    = {Xie, Baijun and Park, Chung Hyuk},
    title     = {Multi-Modal Correlated Network with Emotional Reasoning Knowledge for Social Intelligence Question-Answering},
    booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops},
    month     = {October},
    year      = {2023},
    pages     = {3075-3081},
    url = "https://openaccess.thecvf.com/content/ICCV2023W/ASI/papers/Xie_Multi-Modal_Correlated_Network_with_Emotional_Reasoning_Knowledge_for_Social_Intelligence_ICCVW_2023_paper.pdf",
    environments = {text, virtual},
  agents = {pretraining, finetuning},
  evaluation = {model_based},
  other = {n/a},
}


#### Reinforcement learning
@article{modeling/reinforcement-learning,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}


@article{xu2023language,
  title={Language agents with reinforcement learning for strategic play in the werewolf game},
  author={Xu, Zelai and Yu, Chao and Fang, Fei and Wang, Yu and Wu, Yi},
  journal={arXiv preprint arXiv:2310.18940},
  year={2023},
  month={10},
  url={https://arxiv.org/abs/2310.18940},
  environments={collaboration, competition, mixed_objectives, text},
  agents={reinforcement_learning, more_than_three_agents, agent_teams},
  evaluation={rule_based},
  other={n/a}
}

@inproceedings{yu2023asynchronous,
  title={Asynchronous Multi-Agent Reinforcement Learning for Efficient Real-Time Multi-Robot Cooperative Exploration},
  author={Yu, Chao and Yang, Xinyi and Gao, Jiaxuan and Chen, Jiayu and Li, Yunfei and Liu, Jijia and Xiang, Yunfei and Huang, Ruixin and Yang, Huazhong and Wu, Yi and others},
  booktitle={Proceedings of the 2023 International Conference on Autonomous Agents and Multiagent Systems},
  pages={1107--1115},
  year={2023},
  month={5},
  url={https://dl.acm.org/doi/abs/10.5555/3545946.3598752},
  environments={collaboration, competition, mixed_objectives, robotics},
  agents={reinforcement_learning, more_than_three_agents, agent_teams},
  evaluation={rule_based},
  other={n/a}
}

@inproceedings{charakorn2020investigating,
  title={Investigating partner diversification methods in cooperative multi-agent deep reinforcement learning},
  author={Charakorn, Rujikorn and Manoonpong, Poramate and Dilokthanakul, Nat},
  booktitle={Neural Information Processing: 27th International Conference, ICONIP 2020, Bangkok, Thailand, November 18--22, 2020, Proceedings, Part V 27},
  pages={395--402},
  year={2020},
  organization={Springer},
  month={11},
  url={https://link.springer.com/chapter/10.1007/978-3-030-63823-8_46},
  environments={collaboration, virtual},
  agents={reinforcement_learning, two_agents},
  evaluation={rule_based},
  other={n/a}
}


@article{pan2024autonomous,
  title={Autonomous Evaluation and Refinement of Digital Agents},
  author={Pan, Jiayi and Zhang, Yichi and Tomlin, Nicholas and Zhou, Yifei and Levine, Sergey and Suhr, Alane},
  journal={arXiv preprint arXiv:2404.06474},
  year={2024},
  month={4},
  url={https://arxiv.org/abs/2404.06474v2},
  environments={virtual},
  agents={prompting_and_in_context_learning, finetuning},
  evaluation={rule_based},
  other={n/a}
}

@article{van2022language,
  title={Language games meet multi-agent reinforcement learning: A case study for the naming game},
  author={Van Eecke, Paul and Beuls, Katrien and Botoko Ekila, J{\'e}r{\^o}me and R{\u{a}}dulescu, Roxana},
  journal={Journal of Language Evolution},
  volume={7},
  number={2},
  pages={213--223},
  month={4},
  year={2022},
  url={https://academic.oup.com/jole/article/7/2/213/7128304},
  publisher={Oxford University Press UK},
  environments={collaboration, competition, mixed_objectives},
  agents={reinforcement_learning, two_agents},
  evaluation={rule_based},
  other={n/a}
}

@inproceedings{zhu2022language,
  title={Language Learning from Communicative Goals and Linguistic Input},
  author={Zhu, Hao and Bisk, Yonatan and Neubig, Graham},
  booktitle={Proceedings of the Annual Meeting of the Cognitive Science Society},
  volume={44},
  number={44},
  year={2022},
  month={7},
  url={https://escholarship.org/uc/item/7p65n371},
  environments={collaboration, virtual},
  agents={reinforcement_learning, two_agents},
  evaluation={rule_based},
  other={simulated_humans}
}

@misc{wang2024sotopiapi,
      title={SOTOPIA-$\pi$: Interactive Learning of Socially Intelligent Language Agents}, 
      author={Ruiyi Wang and Haofei Yu and Wenxin Zhang and Zhengyang Qi and Maarten Sap and Graham Neubig and Yonatan Bisk and Hao Zhu},
      year={2024},
      eprint={2403.08715},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      environments = {collaboration, competition, mixed_objectives, text},
      agents = {reinforcement_learning, two_agents},
      evaluation = {rule_based, human, model_based},
      other = {simulated_humans},
      url = {https://arxiv.org/abs/2403.08715}
}

@misc{liu2023computational,
      title={Computational Language Acquisition with Theory of Mind}, 
      author={Andy Liu and Hao Zhu and Emmy Liu and Yonatan Bisk and Graham Neubig},
      year={2023},
      eprint={2303.01502},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      environments = {collaboration, virtual},
      agents = {reinforcement_learning, two_agents},
      evaluation = {rule_based},
      other = {simulated_humans},
      url = {https://arxiv.org/abs/2303.01502}
}


@article{zhe2024indentifying,
	abstract = {Identifying key patterns of tactics implemented by rival teams, and developing effective responses, lies at the heart of modern football. However, doing so algorithmically remains an open research challenge. To address this unmet need, we propose TacticAI, an AI football tactics assistant developed and evaluated in close collaboration with domain experts from Liverpool FC. We focus on analysing corner kicks, as they offer coaches the most direct opportunities for interventions and improvements. TacticAI incorporates both a predictive and a generative component, allowing the coaches to effectively sample and explore alternative player setups for each corner kick routine and to select those with the highest predicted likelihood of success. We validate TacticAI on a number of relevant benchmark tasks: predicting receivers and shot attempts and recommending player position adjustments. The utility of TacticAI is validated by a qualitative study conducted with football domain experts at Liverpool FC. We show that TacticAI's model suggestions are not only indistinguishable from real tactics, but also favoured over existing tactics 90{\%} of the time, and that TacticAI offers an effective corner kick retrieval system. TacticAI achieves these results despite the limited availability of gold-standard data, achieving data efficiency through geometric deep learning.},
	author = {Wang, Zhe and Veli{\v c}kovi{\'c}, Petar and Hennes, Daniel and Toma{\v s}ev, Nenad and Prince, Laurel and Kaisers, Michael and Bachrach, Yoram and Elie, Romuald and Wenliang, Li Kevin and Piccinini, Federico and Spearman, William and Graham, Ian and Connor, Jerome and Yang, Yi and Recasens, Adri{\`a} and Khan, Mina and Beauguerlange, Nathalie and Sprechmann, Pablo and Moreno, Pol and Heess, Nicolas and Bowling, Michael and Hassabis, Demis and Tuyls, Karl},
	date = {2024/03/19},
	date-added = {2024-04-19 16:43:21 -0400},
	date-modified = {2024-04-19 16:43:21 -0400},
	doi = {10.1038/s41467-024-45965-x},
	id = {Wang2024},
	isbn = {2041-1723},
	journal = {Nature Communications},
	number = {1},
	pages = {1906},
	title = {TacticAI: an AI assistant for football tactics},
	url = {https://doi.org/10.1038/s41467-024-45965-x},
	volume = {15},
	year = {2024},
  month={3},
  environments = {virtual, collaboration, competition, mixed_objectives},
  agents = {reinforcement_learning, agent_teams},
  evaluation = {rule_based},
  other = {n/a},
	bdsk-url-1 = {https://doi.org/10.1038/s41467-024-45965-x}
}

@article{silver2017mastering,
  title={Mastering the game of go without human knowledge},
  author={Silver, David and Schrittwieser, Julian and Simonyan, Karen and Antonoglou, Ioannis and Huang, Aja and Guez, Arthur and Hubert, Thomas and Baker, Lucas and Lai, Matthew and Bolton, Adrian and others},
  journal={nature},
  volume={550},
  number={7676},
  pages={354--359},
  year={2017},
  publisher={Nature Publishing Group},
  month={4},
  url={https://www.nature.com/articles/nature24270},
  environments = {competition, virtual},
  agents = {reinforcement_learning},
  evaluation = {rule_based},
  other = {human_agent},
}

@article{silver2016mastering,
  title={Mastering the game of Go with deep neural networks and tree search},
  author={Silver, David and Huang, Aja and Maddison, Chris J and Guez, Arthur and Sifre, Laurent and Van Den Driessche, George and Schrittwieser, Julian and Antonoglou, Ioannis and Panneershelvam, Veda and Lanctot, Marc and others},
  journal={nature},
  volume={529},
  number={7587},
  pages={484--489},
  year={2016},
  publisher={Nature Publishing Group},
  month={1},
  url={https://www.nature.com/articles/nature16961},
  environments = {competition, virtual},
  agents = {reinforcement_learning},
  evaluation = {rule_based},
  other = {human_agent},
}

@inproceedings{kharitonov-etal-2019-egg,
    title = "{EGG}: a toolkit for research on Emergence of lan{G}uage in Games",
    author = "Kharitonov, Eugene  and
      Chaabouni, Rahma  and
      Bouchacourt, Diane  and
      Baroni, Marco",
    editor = "Pad{\'o}, Sebastian  and
      Huang, Ruihong",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP): System Demonstrations",
    month = nov,
    year = "2019",
    address = "Hong Kong, China",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/D19-3010",
    doi = "10.18653/v1/D19-3010",
    pages = "55--60",
    abstract = "There is renewed interest in simulating language emergence among deep neural agents that communicate to jointly solve a task, spurred by the practical aim to develop language-enabled interactive AIs, as well as by theoretical questions about the evolution of human language. However, optimizing deep architectures connected by a discrete communication channel (such as that in which language emerges) is technically challenging. We introduce EGG, a toolkit that greatly simplifies the implementation of emergent-language communication games. EGG{'}s modular design provides a set of building blocks that the user can combine to create new games, easily navigating the optimization and architecture space. We hope that the tool will lower the technical barrier, and encourage researchers from various backgrounds to do original work in this exciting area.",
    environments = {collaboration, text},
    agents = {reinforcement_learning, two_agents},
    evaluation = {rule_based},
    other = {n/a}
}

@inproceedings{ cao2018emergent,
  title={Emergent Communication through Negotiation},
  author={Kris Cao and Angeliki Lazaridou and Marc Lanctot and Joel Z Leibo and Karl Tuyls and Stephen Clark},
  booktitle={International Conference on Learning Representations},
  year={2018},
  url={https://openreview.net/forum?id=Hk6WhagRW},
  month={2},
  environments = {mixed_objectives, text},
  agents = {reinforcement_learning, two_agents},
  evaluation = {rule_based},
  other = {n/a}
}

@inproceedings{10.1145/3269206.3272021,
  author = {Jin, Junqi and Song, Chengru and Li, Han and Gai, Kun and Wang, Jun and Zhang, Weinan},
  title = {Real-Time Bidding with Multi-Agent Reinforcement Learning in Display Advertising},
  year = {2018},
  publisher = {Association for Computing Machinery},
  url = {https://doi.org/10.1145/3269206.3272021},
  abstract = {Real-time advertising allows advertisers to bid for each impression for a visiting user. To optimize specific goals such as maximizing revenue and return on investment (ROI) led by ad placements, advertisers not only need to estimate the relevance between the ads and user's interests, but most importantly require a strategic response with respect to other advertisers bidding in the market. In this paper, we formulate bidding optimization with multi-agent reinforcement learning. To deal with a large number of advertisers, we propose a clustering method and assign each cluster with a strategic bidding agent. A practical Distributed Coordinated Multi-Agent Bidding (DCMAB) has been proposed and implemented to balance the tradeoff between the competition and cooperation among advertisers. The empirical study on our industry-scaled real-world data has demonstrated the effectiveness of our methods. Our results show cluster-based bidding would largely outperform single-agent and bandit approaches, and the coordinated bidding achieves better overall objectives than purely self-interested bidding agents.},
  booktitle = {Proceedings of the 27th ACM International Conference on Information and Knowledge Management},
  pages = {2193–2201},
  numpages = {9},
  month = oct,
  environments = {competition, virtual},
  agents = {reinforcement_learning, more_than_three_agents},
  evaluation = {rule_based},
  other = {n/a}
}

@inproceedings{branavan2010reading,
  title={Reading between the lines: Learning to map high-level instructions to commands},
  author={Branavan, SRK and Zettlemoyer, Luke and Barzilay, Regina},
  booktitle={Proceedings of the 48th annual meeting of the association for computational linguistics},
  pages={1268--1277},
  year={2010},
  month={7},
  url={https://www.aclweb.org/anthology/P10-1129},
  environments = {virtual},
  agents = {reinforcement_learning},
  evaluation = {rule_based},
  other = {n/a}
}

#### Pretraining

@inproceedings{wilf2023face,
  title={Face-to-face contrastive learning for social intelligence question-answering},
  author={Wilf, Alex and Ma, Martin Q and Liang, Paul Pu and Zadeh, Amir and Morency, Louis-Philippe},
  booktitle={2023 IEEE 17th International Conference on Automatic Face and Gesture Recognition (FG)},
  pages={1--7},
  year={2023},
  month={5},
  organization={IEEE},
  url = "https://arxiv.org/pdf/2208.01036",
  environments = {text, virtual},
  agents = {pretraining, finetuning},
  evaluation = {model_based},
  other = {n/a},
}

@inproceedings{wilf-etal-2023-difference,
    title = "Difference-Masking: Choosing What to Mask in Continued Pretraining",
    author = "Wilf, Alex  and
      Akter, Syeda  and
      Mathur, Leena  and
      Liang, Paul  and
      Mathew, Sheryl  and
      Shou, Mengrou  and
      Nyberg, Eric  and
      Morency, Louis-Philippe",
    editor = "Bouamor, Houda  and
      Pino, Juan  and
      Bali, Kalika",
    booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
    month = dec,
    year = "2023",
    address = "Singapore",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.findings-emnlp.881",
    doi = "10.18653/v1/2023.findings-emnlp.881",
    pages = "13222--13234",
    abstract = "The self-supervised objective of masked prediction has led to promising performance gains on a variety of downstream tasks. However, while most approaches randomly mask tokens, there is strong intuition that deciding what to mask can substantially improve learning outcomes. We investigate this in continued pretraining setting in which pretrained models continue to pretrain on domain-specific data before performing some downstream task. We introduce Difference-Masking, a masking strategy that automatically chooses what to mask during continued pretraining by considering what makes a task domain different from the pretraining domain. Empirically, we find that Difference-Masking outperforms baselines on continued pretraining settings across four diverse language-only and multimodal video tasks.",
    environments = {text, virtual},
    agents = {pretraining, finetuning},
    evaluation = {model_based},
    other = {n/a},
}

@inproceedings{chang-etal-2020-incorporating,
    title = "Incorporating Commonsense Knowledge Graph in Pretrained Models for Social Commonsense Tasks",
    author = "Chang, Ting-Yun  and
      Liu, Yang  and
      Gopalakrishnan, Karthik  and
      Hedayatnia, Behnam  and
      Zhou, Pei  and
      Hakkani-Tur, Dilek",
    editor = "Agirre, Eneko  and
      Apidianaki, Marianna  and
      Vuli{\'c}, Ivan",
    booktitle = "Proceedings of Deep Learning Inside Out (DeeLIO): The First Workshop on Knowledge Extraction and Integration for Deep Learning Architectures",
    month = nov,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.deelio-1.9",
    doi = "10.18653/v1/2020.deelio-1.9",
    pages = "74--79",
    abstract = "Pretrained language models have excelled at many NLP tasks recently; however, their social intelligence is still unsatisfactory. To enable this, machines need to have a more general understanding of our complicated world and develop the ability to perform commonsense reasoning besides fitting the specific downstream tasks. External commonsense knowledge graphs (KGs), such as ConceptNet, provide rich information about words and their relationships. Thus, towards general commonsense learning, we propose two approaches to implicitly and explicitly infuse such KGs into pretrained language models. We demonstrate our proposed methods perform well on SocialIQA, a social commonsense reasoning task, in both limited and full training data regimes.",
    environments = {text, virtual},
    agents = {pretraining, finetuning},
    evaluation = {model_based},
    other = {n/a},
}


@inproceedings{kim-etal-2023-examining,
    title = "Examining Consistency of Visual Commonsense Reasoning based on Person Grounding",
    author = "Kim, Huiju  and
      Kang, Youjin  and
      Lee, SangKeun",
    editor = "Park, Jong C.  and
      Arase, Yuki  and
      Hu, Baotian  and
      Lu, Wei  and
      Wijaya, Derry  and
      Purwarianti, Ayu  and
      Krisnadhi, Adila Alfa",
    booktitle = "Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = nov,
    year = "2023",
    address = "Nusa Dua, Bali",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.ijcnlp-main.66",
    doi = "10.18653/v1/2023.ijcnlp-main.66",
    pages = "1026--1039",
    environments = {text, virtual},
    agents = {pretraining, finetuning},
    evaluation = {model_based},
    other = {n/a},
}

@misc{ma2024eventlens,
    title={EventLens: Leveraging Event-Aware Pretraining and Cross-modal Linking Enhances Visual Commonsense Reasoning}, 
    author={Mingjie Ma and Zhihuan Yu and Yichao Ma and Guohui Li},
    year={2024},
    month={4},
    eprint={2404.13847},
    archivePrefix={arXiv},
    primaryClass={cs.CV},
    url = "https://arxiv.org/pdf/2404.13847",
    environments = {text, virtual},
    agents = {pretraining, finetuning},
    evaluation = {model_based},
    other = {n/a},
}


@misc{sukhbaatar2024branchtrainmix,
    title={Branch-Train-MiX: Mixing Expert LLMs into a Mixture-of-Experts LLM}, 
    author={Sainbayar Sukhbaatar and Olga Golovneva and Vasu Sharma and Hu Xu and Xi Victoria Lin and Baptiste Rozière and Jacob Kahn and Daniel Li and Wen-tau Yih and Jason Weston and Xian Li},
    year={2024},
    month={3},
    eprint={2403.07816},
    archivePrefix={arXiv},
    primaryClass={cs.CL},
    url = "https://arxiv.org/pdf/2403.07816",
    environments = {text},
    agents = {pretraining, finetuning},
    evaluation = {model_based},
    other = {n/a},
}

### Evaluating social agents

#### Evaluating text social agents
@article{evaluation/language,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}

@article{srivastava2022beyond,
  title={Beyond the imitation game: Quantifying and extrapolating the capabilities of language models},
  author={Srivastava, Aarohi and Rastogi, Abhinav and Rao, Abhishek and Shoeb, Abu Awal Md and Abid, Abubakar and Fisch, Adam and Brown, Adam R and Santoro, Adam and Gupta, Aditya and Garriga-Alonso, Adri{\`a} and others},
  journal={arXiv preprint arXiv:2206.04615},
  year={2022},
  month={6},
  url={https://arxiv.org/abs/2206.04615},
  environments = {text},
  agents = {prompting_and_in_context_learning, finetuning, pretraining, reinforcement_learning, two_agents},
  evaluation = {qualitative, human},
  other = {n/a},
}

@inproceedings{finch-choi-2020-towards,
    title = "Towards Unified Dialogue System Evaluation: A Comprehensive Analysis of Current Evaluation Protocols",
    author = "Finch, Sarah E.  and
      Choi, Jinho D.",
    editor = "Pietquin, Olivier  and
      Muresan, Smaranda  and
      Chen, Vivian  and
      Kennington, Casey  and
      Vandyke, David  and
      Dethlefs, Nina  and
      Inoue, Koji  and
      Ekstedt, Erik  and
      Ultes, Stefan",
    booktitle = "Proceedings of the 21th Annual Meeting of the Special Interest Group on Discourse and Dialogue",
    month = jul,
    year = "2020",
    address = "1st virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.sigdial-1.29",
    doi = "10.18653/v1/2020.sigdial-1.29",
    pages = "236--245",
    environments = {text},
    agents = {n/a},
    evaluation = {human, rule_based, model_based},
    other = {human_agent}
}

@inproceedings{tsuta-etal-2020-ubleu,
    title = "u{BLEU}: Uncertainty-Aware Automatic Evaluation Method for Open-Domain Dialogue Systems",
    author = "Tsuta, Yuma  and
      Yoshinaga, Naoki  and
      Toyoda, Masashi",
    editor = "Rijhwani, Shruti  and
      Liu, Jiangming  and
      Wang, Yizhong  and
      Dror, Rotem",
    booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop",
    month = jul,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.acl-srw.27",
    doi = "10.18653/v1/2020.acl-srw.27",
    pages = "199--206",
    environments = {text},
    agents = {n/a},
    evaluation = {model_based},
    other = {human_agent}
}

@article{deriu2021survey,
    title={Survey on evaluation methods for dialogue systems},
    author={Deriu, Jan and Rodrigo, Alvaro and Otegi, Arantxa and Echegoyen, Guillermo and Rosset, Sophie and Agirre, Eneko and Cieliebak, Mark},
    journal={Artificial Intelligence Review},
    volume={54},
    pages={755--810},
    month={1},
    year={2021},
    publisher={Springer},
    environments = {text},
    agents = {n/a},
    evaluation = {rule_based, model_based},
    other = {human_agent},
    url={https://link.springer.com/article/10.1007/s10462-020-09866-x}
}

@inproceedings{zhang-etal-2021-dynaeval,
    title = "{D}yna{E}val: Unifying Turn and Dialogue Level Evaluation",
    author = "Zhang, Chen  and
      Chen, Yiming  and
      D{'}Haro, Luis Fernando  and
      Zhang, Yan  and
      Friedrichs, Thomas  and
      Lee, Grandee  and
      Li, Haizhou",
    editor = "Zong, Chengqing  and
      Xia, Fei  and
      Li, Wenjie  and
      Navigli, Roberto",
    booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)",
    month = aug,
    year = "2021",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.acl-long.441",
    doi = "10.18653/v1/2021.acl-long.441",
    pages = "5676--5689",
    environments = {text},
    agents = {n/a},
    evaluation = {model_based},
    other = {human_agent}
}

@inproceedings{smith-etal-2022-human,
    title = "Human Evaluation of Conversations is an Open Problem: comparing the sensitivity of various methods for evaluating dialogue agents",
    author = "Smith, Eric  and
      Hsu, Orion  and
      Qian, Rebecca  and
      Roller, Stephen  and
      Boureau, Y-Lan  and
      Weston, Jason",
    editor = "Liu, Bing  and
      Papangelis, Alexandros  and
      Ultes, Stefan  and
      Rastogi, Abhinav  and
      Chen, Yun-Nung  and
      Spithourakis, Georgios  and
      Nouri, Elnaz  and
      Shi, Weiyan",
    booktitle = "Proceedings of the 4th Workshop on NLP for Conversational AI",
    month = may,
    year = "2022",
    address = "Dublin, Ireland",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2022.nlp4convai-1.8",
    doi = "10.18653/v1/2022.nlp4convai-1.8",
    pages = "77--97",
    environments = {text},
    agents = {n/a},
    evaluation = {human},
    other = {human_agent},
}

@inproceedings{finch-etal-2023-dont,
    title = "Don{'}t Forget Your {ABC}{'}s: Evaluating the State-of-the-Art in Chat-Oriented Dialogue Systems",
    author = "Finch, Sarah E.  and
      Finch, James D.  and
      Choi, Jinho D.",
    editor = "Rogers, Anna  and
      Boyd-Graber, Jordan  and
      Okazaki, Naoaki",
    booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2023",
    address = "Toronto, Canada",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.acl-long.839",
    doi = "10.18653/v1/2023.acl-long.839",
    pages = "15044--15071",
    environments = {text},
    agents = {n/a},
    evaluation = {human},
    other = {human_agent},
}

@inproceedings{zhang-etal-2023-xdial,
    title = "x{D}ial-Eval: A Multilingual Open-Domain Dialogue Evaluation Benchmark",
    author = "Zhang, Chen  and
      D{'}Haro, Luis  and
      Tang, Chengguang  and
      Shi, Ke  and
      Tang, Guohua  and
      Li, Haizhou",
    editor = "Bouamor, Houda  and
      Pino, Juan  and
      Bali, Kalika",
    booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
    month = dec,
    year = "2023",
    address = "Singapore",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.findings-emnlp.371",
    doi = "10.18653/v1/2023.findings-emnlp.371",
    pages = "5579--5601",
    environments = {text},
    agents = {n/a},
    evaluation = {human},
    other = {human_agent},
}

@inproceedings{zhou2024sotopia,
  title={SOTOPIA: Interactive Evaluation for Social Intelligence in Language Agents},
  author={Xuhui Zhou and Hao Zhu and Leena Mathur and Ruohong Zhang and Haofei Yu and Zhengyang Qi and Louis-Philippe Morency and Yonatan Bisk and Daniel Fried and Graham Neubig and Maarten Sap},
  booktitle={ICLR},
  environments = {mixed_objectives, text},
  agents = {prompting_and_in_context_learning, two_agents},
  evaluation = {model_based, human},
  other = {human_agent},
  year={2024},
  month = {01},
  url={https://openreview.net/forum?id=mM7VurbA4r}
}

@misc{chen2024roleinteract,
      title={RoleInteract: Evaluating the Social Interaction of Role-Playing Agents}, 
      author={Hongzhan Chen and Hehong Chen and Ming Yan and Wenshen Xu and Xing Gao and Weizhou Shen and Xiaojun Quan and Chenliang Li and Ji Zhang and Fei Huang and Jingren Zhou},
      year={2024},
      environments = {implicit_objectives, text},
      agents = {prompting_and_in_context_learning, more_than_three_agents, agents_with_memory, agents_with_personas},
      evaluation = {rule_based},
      other = {simulated_humans},
      url = {https://arxiv.org/abs/2403.13679},
      eprint={2403.13679},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

@inproceedings{svikhnushina-pu-2023-approximating,
      title = "Approximating Online Human Evaluation of Social Chatbots with Prompting",
      author = "Svikhnushina, Ekaterina  and
            Pu, Pearl",
      editor = "Stoyanchev, Svetlana  and
            Joty, Shafiq  and
            Schlangen, David  and
            Dusek, Ondrej  and
            Kennington, Casey  and
            Alikhani, Malihe",
      booktitle = "Proceedings of the 24th Annual Meeting of the Special Interest Group on Discourse and Dialogue",
      month = {09},
      year = "2023",
      address = "Prague, Czechia",
      publisher = "Association for Computational Linguistics",
      url = "https://aclanthology.org/2023.sigdial-1.25",
      doi = "10.18653/v1/2023.sigdial-1.25",
      pages = "268--281",
      abstract = "With conversational models becoming increasingly available to the general public, developing scalable and robust evaluation metrics is crucial to minimize potential social and psychological risks for the users. Existing evaluation metrics aim to automate offline user evaluation and approximate human judgment of pre-curated dialogs. However, they are limited in their ability to capture subjective perceptions of users who actually interact with the chatbots and might not generalize to real-world settings. To address this limitation, we propose an approach to approximate online human evaluation, leveraging large language models (LLMs) from the GPT-family. We introduce a new Dialog system Evaluation framework based on Prompting (DEP), which enables a fully automatic evaluation pipeline that replicates live user studies and achieves an impressive correlation with human judgment (up to Pearson r=0.95 on a system level). The DEP approach involves collecting synthetic chat logs of evaluated bots with an LLM in the other-play setting, where the LLM is carefully conditioned to follow a specific scenario. We further explore different prompting approaches to produce evaluation scores with the same LLM. The best-performing prompts, which contain few-shot demonstrations and instructions, show outstanding performance on the tested dataset and demonstrate the ability to generalize to other dialog corpora.",
      environments = {mixed_objectives, text},
      agents = {prompting_and_in_context_learning, two_agents},
      evaluation = {model_based},
      other = {n/a}
}

@inproceedings{NEURIPS2023_a3621ee9,
      author = {Li, Guohao and Hammoud, Hasan and Itani, Hani and Khizbullin, Dmitrii and Ghanem, Bernard},
      booktitle = {Advances in Neural Information Processing Systems},
      editor = {A. Oh and T. Neumann and A. Globerson and K. Saenko and M. Hardt and S. Levine},
      pages = {51991--52008},
      publisher = {Curran Associates, Inc.},
      title = {CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society},
      url = {https://proceedings.neurips.cc/paper_files/paper/2023/file/a3621ee907def47c1b952ade25c67698-Paper-Conference.pdf},
      volume = {36},
      year = {2023},
      month = {12},
      environments = {mixed_objectives, text},
      agents = {prompting_and_in_context_learning, agent_teams},
      evaluation = {human, model_based},
      other = {human_agent}
}

@article{lan2023llm,
      title={Llm-based agent society investigation: Collaboration and confrontation in avalon gameplay},
      author={Lan, Yihuai and Hu, Zhiqiang and Wang, Lei and Wang, Yang and Ye, Deheng and Zhao, Peilin and Lim, Ee-Peng and Xiong, Hui and Wang, Hao},
      journal={arXiv preprint arXiv:2310.14985},
      eprint={2310.14985},
      year={2023},
      month={10},
      environments = {mixed_objectives, text},
      agents = {prompting_and_in_context_learning, more_than_three_agents, agents_with_memory, agents_with_personas},
      evaluation = {model_based, rule_based},
      other = {simulated_humans},
      url = {https://arxiv.org/pdf/2310.14985.pdf}
}

@misc{tu2023characterchat,
      title={CharacterChat: Learning towards Conversational AI with Personalized Social Support}, 
      author={Quan Tu and Chuanqi Chen and Jinpeng Li and Yanran Li and Shuo Shang and Dongyan Zhao and Ran Wang and Rui Yan},
      year={2023},
      month={08},
      environments = {implicit_objectives, text},
      agents = {prompting_and_in_context_learning, two_agents, agents_with_memory, agents_with_personas},
      evaluation = {model_based, human},
      other = {simulated_humans},
      url = {https://arxiv.org/abs/2308.10278},
      eprint={2308.10278},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

@misc{zhang2023agentcf,
      title={AgentCF: Collaborative Learning with Autonomous Language Agents for Recommender Systems}, 
      author={Junjie Zhang and Yupeng Hou and Ruobing Xie and Wenqi Sun and Julian McAuley and Wayne Xin Zhao and Leyu Lin and Ji-Rong Wen},
      year={2023},
      month={10},
      environments = {mixed_objectives, text},
      agents = {prompting_and_in_context_learning, more_than_three_agents, agents_with_memory, agents_with_personas},
      evaluation = {rule_based},
      other = {simulated_humans},
      url = {https://arxiv.org/abs/2310.09233},
      eprint={2310.09233},
      archivePrefix={arXiv},
      primaryClass={cs.IR}
}

@misc{huang2024far,
      title={How Far Are We on the Decision-Making of LLMs? Evaluating LLMs' Gaming Ability in Multi-Agent Environments}, 
      author={Jen-tse Huang and Eric John Li and Man Ho Lam and Tian Liang and Wenxuan Wang and Youliang Yuan and Wenxiang Jiao and Xing Wang and Zhaopeng Tu and Michael R. Lyu},
      year={2024},
      month={3},
      eprint={2403.11807},
      archivePrefix={arXiv},
      primaryClass={cs.AI},
      url={https://arxiv.org/abs/2403.11807},
      environments = {mixed_objectives, text},
      agents = {prompting_and_in_context_learning, more_than_three_agents},
      evaluation = {rule_based},
      other = {more_omniscient}
}

@misc{chan2023chateval,
      title={ChatEval: Towards Better LLM-based Evaluators through Multi-Agent Debate}, 
      author={Chi-Min Chan and Weize Chen and Yusheng Su and Jianxuan Yu and Wei Xue and Shanghang Zhang and Jie Fu and Zhiyuan Liu},
      year={2023},
      month={8},
      eprint={2308.07201},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2308.07201},
      environments = {collaboration, text},
      agents = {prompting_and_in_context_learning, more_than_three_agents},
      evaluation = {model_based},
      other = {n/a}
}

@misc{li2024automatic,
      title={Automatic Evaluation for Mental Health Counseling using LLMs}, 
      author={Anqi Li and Yu Lu and Nirui Song and Shuai Zhang and Lizhi Ma and Zhenzhong Lan},
      year={2024},
      month={2},
      eprint={2402.11958},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
        url={https://arxiv.org/abs/2402.11958},
        environments = {collaboration, text},
        agents = {prompting_and_in_context_learning, two_agents},
        evaluation = {model_based},
        other = {n/a}
}

@misc{bianchi2024llms,
      title={How Well Can LLMs Negotiate? NegotiationArena Platform and Analysis}, 
      author={Federico Bianchi and Patrick John Chia and Mert Yuksekgonul and Jacopo Tagliabue and Dan Jurafsky and James Zou},
      year={2024},
      month={2},
      eprint={2402.05863},
      archivePrefix={arXiv},
      primaryClass={cs.AI},
        url={https://arxiv.org/abs/2402.05863},
        environments = {mixed_objectives, text},
        agents = {prompting_and_in_context_learning, two_agents},
        evaluation = {rule_based},
        other = {more_information_asymmetrical}
}

@inproceedings{Jiang2023PersonaLLMIT,
  title={PersonaLLM: Investigating the Ability of Large Language Models to Express Personality Traits},
  author={Hang Jiang and Xiajie Zhang and Xubo Cao and Cynthia Breazeal and Deb Roy and Jad Kabbara},
  year={2023},
  booktitle={NAACL Findings},
  url={https://api.semanticscholar.org/CorpusID:268032940},
  environments = {text},
  agents = {prompting_and_in_context_learning},
  evaluation = {human, model_based},
  other = {n/a}, 
  month={5}
}

@article{Xie2024CanLL,
  title={Can Large Language Model Agents Simulate Human Trust Behaviors?},
  author={Chengxing Xie and Canyu Chen and Feiran Jia and Ziyu Ye and Kai Shu and Adel Bibi and Ziniu Hu and Philip H.S. Torr and Bernard Ghanem and G. Li},
  journal={ArXiv},
  year={2024},
  volume={abs/2402.04559},
  url={https://api.semanticscholar.org/CorpusID:267523076},
    environments = {text},
    agents = {prompting_and_in_context_learning},
    evaluation = {human, model_based},
    other = {n/a},
    month={2}
}

@article{Rasal2024LLMHM,
  title={LLM Harmony: Multi-Agent Communication for Problem Solving},
  author={Sumedh Rasal},
  journal={ArXiv},
  year={2024},
  volume={abs/2401.01312},
  url={https://api.semanticscholar.org/CorpusID:266725580},
    environments = {text},
    agents = {prompting_and_in_context_learning},
    evaluation = {human, model_based},
    other = {n/a},
    month={1}
}

@inproceedings{yeh-etal-2021-comprehensive,
    title = "A Comprehensive Assessment of Dialog Evaluation Metrics",
    author = "Yeh, Yi-Ting  and
      Eskenazi, Maxine  and
      Mehri, Shikib",
    editor = "Wei, Wei  and
      Dai, Bo  and
      Zhao, Tuo  and
      Li, Lihong  and
      Yang, Diyi  and
      Chen, Yun-Nung  and
      Boureau, Y-Lan  and
      Celikyilmaz, Asli  and
      Geramifard, Alborz  and
      Ahuja, Aman  and
      Jiang, Haoming",
    booktitle = "The First Workshop on Evaluations and Assessments of Neural Conversation Systems",
    month = nov,
    year = "2021",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.eancs-1.3",
    environments = {text},
    agents = {n/a},
    evaluation = {human, model_based, rule_based},
    other = {n/a}
}

@inproceedings{chang-etal-2020-convokit,
    title = "{C}onvo{K}it: A Toolkit for the Analysis of Conversations",
    author = "Chang, Jonathan P.  and
      Chiam, Caleb  and
      Fu, Liye  and
      Wang, Andrew  and
      Zhang, Justine  and
      Danescu-Niculescu-Mizil, Cristian",
    editor = "Pietquin, Olivier  and
      Muresan, Smaranda  and
      Chen, Vivian  and
      Kennington, Casey  and
      Vandyke, David  and
      Dethlefs, Nina  and
      Inoue, Koji  and
      Ekstedt, Erik  and
      Ultes, Stefan",
    booktitle = "Proceedings of the 21th Annual Meeting of the Special Interest Group on Discourse and Dialogue",
    month = jul,
    year = "2020",
    address = "1st virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.sigdial-1.8",
    doi = "10.18653/v1/2020.sigdial-1.8",
    pages = "57--60",
    environments = {text},
    agents = {n/a},
    evaluation = {human, model_based, rule_based},
    other = {n/a}
}

@misc{giorgi2023psychological,
      title={Psychological Metrics for Dialog System Evaluation}, 
      author={Salvatore Giorgi and Shreya Havaldar and Farhan Ahmed and Zuhaib Akhtar and Shalaka Vaidya and Gary Pan and Lyle H. Ungar and H. Andrew Schwartz and Joao Sedoc},
      year={2023},
      environments = {text},
      agents = {two_agents},
      evaluation = {human, rule_based},
      other = {human_agent},
      url = {https://arxiv.org/abs/2305.14757},
      eprint={2305.14757},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}


@misc{ghazarian2023accent,
      title={ACCENT: An Automatic Event Commonsense Evaluation Metric for Open-Domain Dialogue Systems}, 
      author={Sarik Ghazarian and Yijia Shao and Rujun Han and Aram Galstyan and Nanyun Peng},
      year={2023},
      environments = {text},
      agents = {n/a},
      evaluation = {human, model_based},
      other = {n/a},
      url = {https://arxiv.org/pdf/2305.07797},
      eprint={2305.07797},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

@inproceedings{huang-etal-2020-grade,
      title = "{GRADE}: Automatic Graph-Enhanced Coherence Metric for Evaluating Open-Domain Dialogue Systems",
      author = "Huang, Lishan  and
            Ye, Zheng  and
            Qin, Jinghui  and
            Lin, Liang  and
            Liang, Xiaodan",
      editor = "Webber, Bonnie  and
            Cohn, Trevor  and
            He, Yulan  and
            Liu, Yang",
      booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
      month = nov,
      year = "2020",
      address = "Online",
      publisher = "Association for Computational Linguistics",
      url = "https://aclanthology.org/2020.emnlp-main.742",
      doi = "10.18653/v1/2020.emnlp-main.742",
      pages = "9230--9240",
      abstract = "Automatically evaluating dialogue coherence is a challenging but high-demand ability for developing high-quality open-domain dialogue systems. However, current evaluation metrics consider only surface features or utterance-level semantics, without explicitly considering the fine-grained topic transition dynamics of dialogue flows. Here, we first consider that the graph structure constituted with topics in a dialogue can accurately depict the underlying communication logic, which is a more natural way to produce persuasive metrics. Capitalized on the topic-level dialogue graph, we propose a new evaluation metric GRADE, which stands for Graph-enhanced Representations for Automatic Dialogue Evaluation. Specifically, GRADE incorporates both coarse-grained utterance-level contextualized representations and fine-grained topic-level graph representations to evaluate dialogue coherence. The graph representations are obtained by reasoning over topic-level dialogue graphs enhanced with the evidence from a commonsense graph, including k-hop neighboring representations and hop-attention weights. Experimental results show that our GRADE significantly outperforms other state-of-the-art metrics on measuring diverse dialogue models in terms of the Pearson and Spearman correlations with human judgments. Besides, we release a new large-scale human evaluation benchmark to facilitate future research on automatic metrics.",
      environments = {text},
      agents = {n/a},
      evaluation = {human, model_based},
      other = {n/a}
}

@inproceedings{mehri-eskenazi-2020-unsupervised,
      title = "Unsupervised Evaluation of Interactive Dialog with {D}ialo{GPT}",
      author = "Mehri, Shikib  and
            Eskenazi, Maxine",
      editor = "Pietquin, Olivier  and
            Muresan, Smaranda  and
            Chen, Vivian  and
            Kennington, Casey  and
            Vandyke, David  and
            Dethlefs, Nina  and
            Inoue, Koji  and
            Ekstedt, Erik  and
            Ultes, Stefan",
      booktitle = "Proceedings of the 21th Annual Meeting of the Special Interest Group on Discourse and Dialogue",
      month = jul,
      year = "2020",
      address = "1st virtual meeting",
      publisher = "Association for Computational Linguistics",
      url = "https://aclanthology.org/2020.sigdial-1.28",
      doi = "10.18653/v1/2020.sigdial-1.28",
      pages = "225--235",
      abstract = "It is important to define meaningful and interpretable automatic evaluation metrics for open-domain dialog research. Standard language generation metrics have been shown to be ineffective for dialog. This paper introduces the FED metric (fine-grained evaluation of dialog), an automatic evaluation metric which uses DialoGPT, without any fine-tuning or supervision. It also introduces the FED dataset which is constructed by annotating a set of human-system and human-human conversations with eighteen fine-grained dialog qualities. The FED metric (1) does not rely on a ground-truth response, (2) does not require training data and (3) measures fine-grained dialog qualities at both the turn and whole dialog levels. FED attains moderate to strong correlation with human judgement at both levels.",
      environments = {text},
      agents = {n/a},
      evaluation = {human, model_based},
      other = {n/a}
}

#### Evaluating embodied social agents
@article{evaluation/embodied,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}

@inproceedings{min-etal-2022-dont,
    title = "Don{'}t Copy the Teacher: Data and Model Challenges in Embodied Dialogue",
    author = "Min, So Yeon  and
      Zhu, Hao  and
      Salakhutdinov, Ruslan  and
      Bisk, Yonatan",
    editor = "Goldberg, Yoav  and
      Kozareva, Zornitsa  and
      Zhang, Yue",
    booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
    month = dec,
    year = "2022",
    address = "Abu Dhabi, United Arab Emirates",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2022.emnlp-main.635",
    doi = "10.18653/v1/2022.emnlp-main.635",
    pages = "9361--9368",
    abstract = "Embodied dialogue instruction following requires an agent to complete a complex sequence of tasks from a natural language exchange. The recent introduction of benchmarks raises the question of how best to train and evaluate models for this multi-turn, multi-agent, long-horizon task. This paper contributes to that conversation, by arguing that imitation learning (IL) and related low-level metrics are actually misleading and do not align with the goals of embodied dialogue research and may hinder progress. We provide empirical comparisons of metrics, analysis of three models, and make suggestions for how the field might best progress. First, we observe that models trained with IL take spurious actions during evaluation. Second, we find that existing models fail to ground query utterances, which are essential for task completion. Third, we argue evaluation should focus on higher-level semantic goals. We will release code to additionally filter the data and benchmark models for improved evaluation.",
  environments={mixed_objectives, collaboration, embodied},
  agents={agents_with_memory},
  evaluation={qualitative, human, rule_based},
  other={n/a}
}

@misc{guo2024embodied,
      title={Embodied LLM Agents Learn to Cooperate in Organized Teams}, 
      author={Xudong Guo and Kaixuan Huang and Jiale Liu and Wenhui Fan and Natalia Vélez and Qingyun Wu and Huazheng Wang and Thomas L. Griffiths and Mengdi Wang},
      year={2024},
      month={3},
      environments = {collaboration, embodied},
      agents = {prompting_and_in_context_learning, more_than_three_agents},
      evaluation = {model_based, human},
      url={https://arxiv.org/abs/2403.12482},
      other = {education},
      eprint={2403.12482},
      archivePrefix={arXiv},
      primaryClass={cs.AI}
}


@article{10.1145/3476413,
author = {Biswas, Abhijat and Wang, Allan and Silvera, Gustavo and Steinfeld, Aaron and Admoni, Henny},
title = {SocNavBench: A Grounded Simulation Testing Framework for Evaluating Social Navigation},
year = {2022},
month={2},
url = {https://doi.org/10.1145/3476413},
abstract = {The human-robot interaction community has developed many methods for robots to navigate safely and socially alongside humans. However, experimental procedures to evaluate these works are usually constructed on a per-method basis. Such disparate evaluations make it difficult to compare the performance of such methods across the literature. To bridge this gap, we introduce SocNavBench, a simulation framework for evaluating social navigation algorithms. SocNavBench comprises a simulator with photo-realistic capabilities and curated social navigation scenarios grounded in real-world pedestrian data. We also provide an implementation of a suite of metrics to quantify the performance of navigation algorithms on these scenarios. Altogether, SocNavBench provides a test framework for evaluating disparate social navigation methods in a consistent and interpretable manner. To illustrate its use, we demonstrate testing three existing social navigation methods and a baseline method on SocNavBench, showing how the suite of metrics helps infer their performance trade-offs. Our code is open-source, allowing the addition of new scenarios and metrics by the community to help evolve SocNavBench to reflect advancements in our understanding of social navigation.},
journal = {J. Hum.-Robot Interact.},
      environments={implicit_objectives, embodied},
      agents={reinforcement_learning},
      evaluation={rule_based},
      other={simulated_humans}
}


@inproceedings{10.5555/3463952.3464159,
author = {Knott, Paul and Carroll, Micah and Devlin, Sam and Ciosek, Kamil and Hofmann, Katja and Dragan, Anca and Shah, Rohin},
title = {Evaluating the Robustness of Collaborative Agents},
year = {2021},
month={1},
url={https://dl.acm.org/doi/abs/10.5555/3463952.3464159},
isbn = {9781450383073},
publisher = {International Foundation for Autonomous Agents and Multiagent Systems},
address = {Richland, SC},
abstract = {Artificial agents trained by deep reinforcement learning will likely encounter novel situations after deployment that were never seen during training. Our agent must be robust to handle such situations well. However, if we cannot rely on the average training or validation reward as a metric, then how can we effectively evaluate robustness? We take inspiration from the practice of unit testing in software engineering. Specifically, we suggest that when designing AI agents that collaborate with humans, designers should search for potential edge cases in possible partner behavior and possible states encountered, and write tests which check that the behavior of the agent in these edge cases is reasonable. We apply this methodology to build a suite of unit tests for the Overcooked-AI environment, and use this test suite to evaluate three proposals for improving robustness. We find that the test suite provides significant insight into the effects of these proposals that were generally not revealed by looking solely at the average validation reward. For our full paper, see https://arxiv.org/abs/2101.05507 arxiv.org/abs/2101.05507},
booktitle = {Proceedings of the 20th International Conference on Autonomous Agents and MultiAgent Systems},
pages = {1560–1562},
numpages = {3},
keywords = {human-AI collaboration, multi-agent RL, robustness},
location = {Virtual Event, United Kingdom},
series = {AAMAS '21},
      environments={collaboration, embodied},
      agents={reinforcement_learning},
      evaluation={rule_based},
      other={n/a}
}


@misc{singh2024change,
      title={How much can change in a year? Revisiting Evaluation in Multi-Agent Reinforcement Learning}, 
      author={Siddarth Singh and Omayma Mahjoub and Ruan de Kock and Wiem Khlifi and Abidine Vall and Kale-ab Tessera and Arnu Pretorius},
      year={2023},
      eprint={2312.08463},
      archivePrefix={arXiv},
      primaryClass={cs.AI},
      environments = {collaboration, embodied},
      agents = {reinforcement_learning, more_than_three_agents},
      evaluation = {rule_based},
      other = {n/a},
      url = {https://arxiv.org/abs/2312.08463}
}

@inproceedings{leibo2021scalable,
  title={Scalable evaluation of multi-agent reinforcement learning with melting pot},
  author={Leibo, Joel Z and Due{\~n}ez-Guzman, Edgar A and Vezhnevets, Alexander and Agapiou, John P and Sunehag, Peter and Koster, Raphael and Matyas, Jayd and Beattie, Charlie and Mordatch, Igor and Graepel, Thore},
  booktitle={International conference on machine learning},
  pages={6187--6199},
  year={2021},
  organization={PMLR},
  environments = {collaboration, competition, mixed_objectives, embodied},
  agents = {reinforcement_learning, more_than_three_agents},
  evaluation = {rule_based},
  other = {n/a},
  month = {7},
  url={https://proceedings.mlr.press/v139/leibo21a.html}
}

@inproceedings{bara-etal-2021-mindcraft,
    title = "{M}ind{C}raft: Theory of Mind Modeling for Situated Dialogue in Collaborative Tasks",
    author = "Bara, Cristian-Paul  and
      CH-Wang, Sky  and
      Chai, Joyce",
    booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
    month = nov,
    year = "2021",
    address = "Online and Punta Cana, Dominican Republic",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.emnlp-main.85",
    pages = "1112--1125",
    abstract = "An ideal integration of autonomous agents in a human world implies that they are able to collaborate on human terms. In particular, theory of mind plays an important role in maintaining common ground during human collaboration and communication. To enable theory of mind modeling in situated interactions, we introduce a fine-grained dataset of collaborative tasks performed by pairs of human subjects in the 3D virtual blocks world of Minecraft. It provides information that captures partners{'} beliefs of the world and of each other as an interaction unfolds, bringing abundant opportunities to study human collaborative behaviors in situated language communication. As a first step towards our goal of developing embodied AI agents able to infer belief states of collaborative partners in situ, we build and present results on computational models for several theory of mind tasks.",
    environments = {collaboration, embodied},
    agents = {finetuning, two_agents},
    evaluation = {human, rule_based},
    other = {human_agent}
}

@inproceedings{narayan-chen-etal-2019-collaborative,
    title = "Collaborative Dialogue in {M}inecraft",
    author = "Narayan-Chen, Anjali  and
      Jayannavar, Prashant  and
      Hockenmaier, Julia",
    editor = "Korhonen, Anna  and
      Traum, David  and
      M{\`a}rquez, Llu{\'\i}s",
    booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
    month = jul,
    year = "2019",
    address = "Florence, Italy",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/P19-1537",
    doi = "10.18653/v1/P19-1537",
    pages = "5405--5415",
    abstract = "We wish to develop interactive agents that can communicate with humans to collaboratively solve tasks in grounded scenarios. Since computer games allow us to simulate such tasks without the need for physical robots, we define a Minecraft-based collaborative building task in which one player (A, the Architect) is shown a target structure and needs to instruct the other player (B, the Builder) to build this structure. Both players interact via a chat interface. A can observe B but cannot place blocks. We present the Minecraft Dialogue Corpus, a collection of 509 conversations and game logs. As a first step towards our goal of developing fully interactive agents for this task, we consider the subtask of Architect utterance generation, and show how challenging it is.",
    environments = {collaboration, embodied},
    agents = {two_agents},
    evaluation = {human, rule_based},
    other = {human_agent}
}

@inproceedings{ichikawa-higashinaka-2022-analysis,
    title = "Analysis of Dialogue in Human-Human Collaboration in {M}inecraft",
    author = "Ichikawa, Takuma  and
      Higashinaka, Ryuichiro",
    editor = "Calzolari, Nicoletta  and
      B{\'e}chet, Fr{\'e}d{\'e}ric  and
      Blache, Philippe  and
      Choukri, Khalid  and
      Cieri, Christopher  and
      Declerck, Thierry  and
      Goggi, Sara  and
      Isahara, Hitoshi  and
      Maegaard, Bente  and
      Mariani, Joseph  and
      Mazo, H{\'e}l{\`e}ne  and
      Odijk, Jan  and
      Piperidis, Stelios",
    booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
    month = jun,
    year = "2022",
    address = "Marseille, France",
    publisher = "European Language Resources Association",
    url = "https://aclanthology.org/2022.lrec-1.431",
    pages = "4051--4059",
    abstract = "Recently, many studies have focused on developing dialogue systems that enable collaborative work; however, they rarely focus on creative tasks. Collaboration for creative work, in which humans and systems collaborate to create new value, will be essential for future dialogue systems. In this study, we collected 500 dialogues of human-human collaboration in Minecraft as a basis for developing a dialogue system that enables creative collaborative work. We conceived the Collaborative Garden Task, where two workers interact and collaborate in Minecraft to create a garden, and we collected dialogue, action logs, and subjective evaluations. We also collected third-person evaluations of the gardens and analyzed the relationship between dialogue and collaborative work that received high scores on the subjective and third-person evaluations in order to identify dialogic factors for high-quality collaborative work. We found that two essential aspects in creative collaborative work are performing more processes to ask for and agree on suggestions between workers and agreeing on a particular image of the final product in the early phase of work and then discussing changes and details.",
    environments = {collaboration, embodied},
    agents = {two_agents},
    evaluation = {human, rule_based},
    other = {human_agent}
}

@InProceedings{CordialSync,
  author = {Jain, Unnat and Weihs, Luca and Kolve, Eric and Farhadi, Ali and Lazebnik, Svetlana and Kembhavi, Aniruddha and Schwing, Alexander G.},
  title = {A Cordial Sync: Going Beyond Marginal Policies For Multi-Agent Embodied Tasks},
  booktitle = {ECCV},
  year = {2020},
  month={11},
  note = {first two authors contributed equally},
  environments = {collaboration, embodied},
  agents = {finetuning, reinforcement_learning, two_agents},
  evaluation = {rule_based},
  other = {n/a},
  url = {https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123500460.pdf},
}

@InProceedings{TwoBody,
  author = {Jain, Unnat and Weihs, Luca and Kolve, Eric and Rastegari, Mohammad and Lazebnik, Svetlana and Farhadi, Ali and Schwing, Alexander G. and Kembhavi, Aniruddha},
  title = {Two Body Problem: Collaborative Visual Task Completion},
  booktitle = {CVPR},
  year = {2019},
  month = {6},
  note = {first two authors contributed equally},
  environments = {collaboration, embodied},
  agents = {finetuning, reinforcement_learning, two_agents},
  evaluation = {rule_based},
  other = {n/a},
  url = {https://openaccess.thecvf.com/content_CVPR_2019/papers/Jain_Two_Body_Problem_Collaborative_Visual_Task_Completion_CVPR_2019_paper.pdf},
}

@inproceedings{teach,
  title={{TEACh: Task-driven Embodied Agents that Chat}},
  author={Padmakumar, Aishwarya and Thomason, Jesse and Shrivastava, Ayush and Lange, Patrick and Narayan-Chen, Anjali and Gella, Spandana and Piramuthu, Robinson and Tur, Gokhan and Hakkani-Tur, Dilek},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={36},
  number={2},
  pages={2017--2025},
  year={2022},
  month={2},
  environments = {collaboration, embodied},
  agents = {finetuning, two_agents},
  evaluation = {rule_based},
  other = {human_agent},
  url = {https://arxiv.org/abs/2110.00534}
}

@inproceedings{teachda,
  title={{Dialog Acts for Task-Driven Embodied Agents}},
  author={Gella, Spandana and Padmakumar, Aishwarya and Lange, Patrick and Hakkani-Tur, Dilek},
  booktitle={Proceedings of the 23nd Annual Meeting of the Special Interest Group on Discourse and Dialogue (SIGDial)},
  year={2022},
  pages={111-123},
  month={9},
  environments = {collaboration, embodied},
  agents = {finetuning, two_agents},
  evaluation = {rule_based},
  other = {human_agent},
  url = {https://aclanthology.org/2022.sigdial-1.13},
}

@inproceedings{kim2016evaluation,
  title={Evaluation of starcraft artificial intelligence competition bots by experienced human players},
  author={Kim, Man-Je and Kim, Kyung-Joong and Kim, SeungJun and Dey, Anind K},
  booktitle={Proceedings of the 2016 CHI Conference Extended Abstracts on Human Factors in Computing Systems},
  pages={1915--1921},
  year={2016},
  month={5},
  environments = {collaboration, competition, mixed_objectives, embodied},
  agents = {more_than_three_agents},
  evaluation = {human, rule_based},
  other = {human_agent},
  url = {https://dl.acm.org/doi/pdf/10.1145/2851581.2892305},
}

#### Evaluating virtual social agents
@article{evaluation/virtual,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}

@inproceedings{fitrianie2022artificial,
  title={The Artificial-Social-Agent Questionnaire: Establishing the long and short questionnaire versions},
  author={Fitrianie, Siska and Bruijnes, Merijn and Li, Fengxiang and Abdulrahman, Amal and Brinkman, Willem-Paul},
  booktitle={Proceedings of the 22nd ACM International Conference on Intelligent Virtual Agents},
  pages={1--8},
  year={2022},
  month={1},
  url={https://dl.acm.org/doi/abs/10.1145/3514197.3549612},
  environments = {text, virtual, embodied, robotics},
  agents = {two_agents},
  evaluation = {qualitative, human},
  other = {n/a},
}

@inproceedings{fitrianie202019,
  title={The 19 unifying questionnaire constructs of artificial social agents: An iva community analysis},
  author={Fitrianie, Siska and Bruijnes, Merijn and Richards, Deborah and B{\"o}nsch, Andrea and Brinkman, Willem-Paul},
  booktitle={Proceedings of the 20th ACM International Conference on Intelligent Virtual Agents},
  pages={1--8},
  year={2020},
  month={1},
  url={https://dl.acm.org/doi/10.1145/3383652.3423873},
  environments = {text, virtual, embodied, robotics},
  agents = {two_agents},
  evaluation = {qualitative, human},
  other = {n/a},
}

@incollection{paiva2021empathy,
  title={Empathy and prosociality in social agents},
  author={Paiva, Ana and Correia, Filipa and Oliveira, Raquel and Santos, Fernando and Arriaga, Patr{\'\i}cia},
  booktitle={The Handbook on Socially Interactive Agents: 20 Years of Research on Embodied Conversational Agents, Intelligent Virtual Agents, and Social Robotics Volume 1: Methods, Behavior, Cognition},
  pages={385--432},
  year={2021},
  month={1},
  url={https://dl.acm.org/doi/10.1145/3477322.3477334},
  environments = {text, virtual, embodied, robotics},
  agents = {two_agents},
  evaluation = {qualitative, human},
  other = {n/a},
}

@Article{Lù2024WebLINXRW,
 author = {Xing Han Lù and Zdeněk Kasner and Siva Reddy},
 booktitle = {arXiv.org},
 journal = {ArXiv},
 title = {WebLINX: Real-World Website Navigation with Multi-Turn Dialogue},
 volume = {abs/2402.05930},
 year = {2024},
 month = {2},
 url = {https://arxiv.org/abs/2402.05930},
 environments={virtual},
 agents={prompting_and_in_context_learning, finetuning},
 evaluation={rule_based},
 other={human_agent}
}

@article{Xie2023OpenAgentsAO,
  title={OpenAgents: An Open Platform for Language Agents in the Wild},
  author={Tianbao Xie and Fan Zhou and Zhoujun Cheng and Peng Shi and Luoxuan Weng and Yitao Liu and Toh Jing Hua and Junning Zhao and Qian Liu and Che Liu and Leo Z. Liu and Yiheng Xu and Hongjin Su and Dongchan Shin and Caiming Xiong and Tao Yu},
  journal={ArXiv},
  year={2023},
  volume={abs/2310.10634},
  url={https://api.semanticscholar.org/CorpusID:264172893},
  environments = {virtual, collaboration},
  agents = {n/a},
  evaluation = {qualitative},
  month={10},
  other = {n/a},
}

@misc{mialon2023gaia,
      title={GAIA: a benchmark for General AI Assistants}, 
      author={Grégoire Mialon and Clémentine Fourrier and Craig Swift and Thomas Wolf and Yann LeCun and Thomas Scialom},
      year={2023},
      month={11},
      eprint={2311.12983},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      environments = {virtual, collaboration},
      agents = {n/a},
      evaluation = {rule_based},
      other = {fully_omniscient},
      url = {https://arxiv.org/abs/2311.12983}
}

@article{Sumers2023CognitiveAF,
  title={Cognitive Architectures for Language Agents},
  author={Theodore R. Sumers and Shunyu Yao and Karthik Narasimhan and Thomas L. Griffiths},
  journal={ArXiv},
  year={2023},
  volume={abs/2309.02427},
  url={https://api.semanticscholar.org/CorpusID:261556862},
  environments = {n/a},
  agents = {n/a},
  evaluation = {n/a},
  other = {n/a},
  month={9}
}

@inproceedings{Hong2023MetaGPTMP,
  title={MetaGPT: Meta Programming for A Multi-Agent Collaborative Framework},
  author={Sirui Hong and Mingchen Zhuge and Jonathan Chen and Xiawu Zheng and Yuheng Cheng and Ceyao Zhang and Jinlin Wang and Zili Wang and Steven Ka Shing Yau and Zi Hen Lin and Liyang Zhou and Chenyu Ran and Lingfeng Xiao and Chenglin Wu and J{\"u}rgen Schmidhuber},
  year={2023},
  url={https://api.semanticscholar.org/CorpusID:265301950},
  environments = {virtual, collaboration},
  agents = {prompting_and_in_context_learning},
  evaluation = {rule_based},
  other = {more_omniscient},
  month={11},
  journal={ArXiv},
}

@inproceedings{10.1145/3290605.3300511,
author = {Wang, Isaac and Smith, Jesse and Ruiz, Jaime},
title = {Exploring Virtual Agents for Augmented Reality},
year = {2019},
isbn = {9781450359702},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3290605.3300511},
doi = {10.1145/3290605.3300511},
booktitle = {CHI},
pages = {1–12},
numpages = {12},
keywords = {embodied conversational agents, augmented reality},
location = {Glasgow, Scotland Uk},
month = {5},
environments = {virtual, collaboration},
agents = {n/a},
evaluation = {human, rule_based},
other = {human_agent},
}

@inproceedings{10.1145/3374920.3374956,
author = {Reinhardt, Jens and Hillen, Luca and Wolf, Katrin},
title = {Embedding Conversational Agents into AR: Invisible or with a Realistic Human Body?},
year = {2020},
isbn = {9781450361071},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3374920.3374956},
doi = {10.1145/3374920.3374956},
booktitle = {Proceedings of the Fourteenth International Conference on Tangible, Embedded, and Embodied Interaction},
pages = {299–310},
numpages = {12},
keywords = {intelligent virtual assistants, embodied conversational agents, avatars, augmented reality},
location = {Sydney NSW, Australia},
series = {TEI '20},
environments = {virtual, collaboration},
agents = {two_agents},
evaluation = {human},
other = {human_agent},
month           = {2},
}

@inproceedings{zadeh2018multimodal,
  title={Multimodal language analysis in the wild: Cmu-mosei dataset and interpretable dynamic fusion graph},
  author={Zadeh, AmirAli Bagher and Liang, Paul Pu and Poria, Soujanya and Cambria, Erik and Morency, Louis-Philippe},
  booktitle={Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  pages={2236--2246},
  year={2018},
  month={7},
  url={https://aclanthology.org/P18-1208/},
  environments = {text, virtual},
  agents = {two_agents},
  evaluation = {qualitative, human},
  other = {n/a},
}

@inproceedings{zadeh2019social,
  title={Social-iq: A question answering benchmark for artificial social intelligence},
  author={Zadeh, Amir and Chan, Michael and Liang, Paul Pu and Tong, Edmund and Morency, Louis-Philippe},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={8807--8817},
  year={2019},
  month={6},
  url={https://openaccess.thecvf.com/content_CVPR_2019/html/Zadeh_Social-IQ_A_Question_Answering_Benchmark_for_Artificial_Social_Intelligence_CVPR_2019_paper.html},
  environments = {text, virtual},
  agents = {more_than_three_agents},
  evaluation = {qualitative, human},
  other = {n/a},
}

@misc{chen2024chatshop,
      title={ChatShop: Interactive Information Seeking with Language Agents}, 
      author={Sanxing Chen and Sam Wiseman and Bhuwan Dhingra},
      year={2024},
      eprint={2404.09911},
      archivePrefix={arXiv},
      url          = {https://arxiv.org/abs/2404.09911},
      environments = {virtual},
      agents = {prompting_and_in_context_learning},
      evaluation = {rule_based},
      other = {human_agent},
      primaryClass={cs.CL}
}


#### Evaluating robotics in social contexts
@article{evaluation/robotics,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}


@misc{sferrazza2024humanoidbench,
    title={HumanoidBench: Simulated Humanoid Benchmark for Whole-Body Locomotion and Manipulation}, 
    author={Carmelo Sferrazza and Dun-Ming Huang and Xingyu Lin and Youngwoon Lee and Pieter Abbeel},
    year={2024},
    month={3},
    url={https://arxiv.org/abs/2403.10506},
    eprint={2403.10506},
    archivePrefix={arXiv},
    primaryClass={cs.RO},
    environments = {collaboration, mixed_objectives, robotics},
    agents = {reinforcement_learning},
    evaluation = {human, model_based},
    other = {human_agent, simulated_humans}
}

@INPROCEEDINGS{1174284,
    author={Scholtz, J.},
    booktitle={36th Annual Hawaii International Conference on System Sciences, 2003. Proceedings of the}, 
    title={Theory and evaluation of human robot interactions}, 
    year={2003},
    month={1},
    url={https://ieeexplore.ieee.org/document/1174284},
    volume={},
    number={},
    pages={10 pp.-},
    keywords={Human robot interaction;Robot sensing systems;Mobile robots;Software architecture;NIST;Feeds;User interfaces;Computer architecture;Human computer interaction;Man machine systems},
    doi={10.1109/HICSS.2003.1174284},
    environments = {collaboration, mixed_objectives, robotics},
    agents = {reinforcement_learning},
    evaluation = {human},
    other = {human_agent}
  
}
  
@inproceedings{10.1145/1121241.1121249,
    author = {Steinfeld, Aaron and Fong, Terrence and Kaber, David and Lewis, Michael and Scholtz, Jean and Schultz, Alan and Goodrich, Michael},
    title = {Common metrics for human-robot interaction},
    year = {2006},
    month = {3},
    isbn = {1595932941},
    publisher = {Association for Computing Machinery},
    address = {New York, NY, USA},
    url = {https://doi.org/10.1145/1121241.1121249},
    doi = {10.1145/1121241.1121249},
    abstract = {This paper describes an effort to identify common metrics for task-oriented human-robot interaction (HRI). We begin by discussing the need for a toolkit of HRI metrics. We then describe the framework of our work and identify important biasing factors that must be taken into consideration. Finally, we present suggested common metrics for standardization and a case study. Preparation of a larger, more detailed toolkit is in progress.},
    booktitle = {Proceedings of the 1st ACM SIGCHI/SIGART Conference on Human-Robot Interaction},
    pages = {33–40},
    numpages = {8},
    keywords = {unmanned ground vehicles, metrics, human-robot interaction},
    location = {Salt Lake City, Utah, USA},
    series = {HRI '06},
    environments = {collaboration, mixed_objectives, robotics},
    agents = {reinforcement_learning},
    evaluation = {human, rule_based, model_based},
    other = {human_agent}
}

@article{zacharaki2020safety,
    title={Safety bounds in human robot interaction: A survey},
    author={Zacharaki, Angeliki and Kostavelis, Ioannis and Gasteratos, Antonios and Dokas, Ioannis},
    journal={Safety science},
    volume={127},
    pages={104667},
    year={2020},
    month = {7},
    publisher={Elsevier},
    url = {https://www.sciencedirect.com/science/article/pii/S0925753520300643},
    environments = {collaboration, mixed_objectives, robotics},
    agents = {reinforcement_learning},
    evaluation = {human, rule_based},
    other = {human_agent}
}

@article{hancock2011meta,
    title={A meta-analysis of factors affecting trust in human-robot interaction},
    author={Hancock, Peter A and Billings, Deborah R and Schaefer, Kristin E and Chen, Jessie YC and De Visser, Ewart J and Parasuraman, Raja},
    journal={Human factors},
    volume={53},
    number={5},
    pages={517--527},
    year={2011},
    month={10},
    url={https://journals.sagepub.com/doi/10.1177/0018720811417254},
    publisher={Sage Publications Sage CA: Los Angeles, CA},
    environments = {collaboration, mixed_objectives, robotics},
    agents = {reinforcement_learning},
    evaluation = {human, rule_based},
    other = {human_agent}
}

@article{bartneck2009measurement,
    title={Measurement instruments for the anthropomorphism, animacy, likeability, perceived intelligence, and perceived safety of robots},
    author={Bartneck, Christoph and Kuli{\'c}, Dana and Croft, Elizabeth and Zoghbi, Susana},
    journal={International journal of social robotics},
    volume={1},
    pages={71--81},
    year={2009},
    month={11},
    publisher={Springer},
    url={https://link.springer.com/article/10.1007/s12369-008-0001-3},
    environments = {collaboration, mixed_objectives, robotics},
    agents = {reinforcement_learning},
    evaluation = {human, rule_based},
    other = {human_agent}
}

@article{iocchi2015robocup,
    title={RoboCup@ Home: Analysis and results of evolving competitions for domestic and service robots},
    author={Iocchi, Luca and Holz, Dirk and Ruiz-del-Solar, Javier and Sugiura, Komei and Van Der Zant, Tijn},
    journal={Artificial Intelligence},
    volume={229},
    pages={258--281},
    year={2015},
    month={12},
    url={https://www.sciencedirect.com/science/article/pii/S0004370215001174},
    publisher={Elsevier},
    environments = {collaboration, mixed_objectives, robotics},
    agents = {reinforcement_learning},
    evaluation = {human, rule_based},
    other = {human_agent}
}

@article{doi:10.1080/01691864.2019.1698462,
    author = {Y. Mizuchi and T. Inamura},
    title = {Optimization of criterion for objective evaluation of HRI performance that approximates subjective evaluation: a case study in robot competition},
    journal = {Advanced Robotics},
    volume = {34},
    number = {3-4},
    pages = {142--156},
    year = {2020},
    month = {12},
    publisher = {Taylor \& Francis},
    doi = {10.1080/01691864.2019.1698462},
    url = {https://doi.org/10.1080/01691864.2019.1698462},
    environments = {competition, mixed_objectives, robotics},
    agents = {reinforcement_learning},
    evaluation = {human, rule_based},
    other = {human_agent}
}
### Interactions with humans

#### Human-Chatbot Interaction
@article{interactions/text,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}

@article{lin2023decision,
  title={Decision-oriented dialogue for human-ai collaboration},
  author={Lin, Jessy and Tomlin, Nicholas and Andreas, Jacob and Eisner, Jason},
  journal={arXiv preprint arXiv:2305.20076},
  year={2023},
  month={5},
  url={https://arxiv.org/abs/2305.20076},
  environments={collaboration, text},
  agents={two_agents},
  evaluation={rule_based},
  other={human_agent}
}

@inproceedings{rapp2023collaborating,
  title={Collaborating with a Text-Based Chatbot: An Exploration of Real-World Collaboration Strategies Enacted during Human-Chatbot Interactions},
  author={Rapp, Amon and Boldi, Arianna and Curti, Lorenzo and Perrucci, Alessandro and Simeoni, Rossana},
  booktitle={Proceedings of the 2023 CHI Conference on Human Factors in Computing Systems},
  pages={1--17},
  month={4},
  year={2023},
  url={https://dl.acm.org/doi/pdf/10.1145/3544548.3580995},
  environments = {text},
  agents = {prompting_and_in_context_learning},
  evaluation = {qualitative},
  other = {human_agent}
}

@inproceedings{pillis2024ai,
  title={AI Comes Out of the Closet: Using AI-Generated Virtual Characters to Help Individuals Practice LGBTQIA+ Advocacy},
  author={Pillis, Daniel and Pataranutaporn, Pat and Maes, Pattie and Sra, Misha},
  booktitle={Proceedings of the 29th International Conference on Intelligent User Interfaces},
  pages={686--698},
  month={3},
  year={2024},
  url={https://dl.acm.org/doi/pdf/10.1145/3640543.3645213},
  environments = {text},
  agents = {prompting_and_in_context_learning},
  evaluation = {qualitative},
  other = {human_agent}
}

@inproceedings{lee2023exploring,
  title={Exploring effects of chatbot-based social contact on reducing mental illness stigma},
  author={Lee, Yi-Chieh and Cui, Yichao and Jamieson, Jack and Fu, Wayne and Yamashita, Naomi},
  booktitle={Proceedings of the 2023 CHI Conference on Human Factors in Computing Systems},
  url={https://dl.acm.org/doi/pdf/10.1145/3544548.3581384},
  pages={1--16},
  month={4},
  year={2023},
  environments = {text},
  agents = {prompting_and_in_context_learning},
  evaluation = {qualitative, human},
  other = {human_agent, health}
}

@inproceedings{jang2024s,
  title={" It's the only thing I can trust": Envisioning Large Language Model Use by Autistic Workers for Communication Assistance},
  author={Jang, JiWoong and Moharana, Sanika and Carrington, Patrick and Begel, Andrew},
  journal={arXiv preprint arXiv:2403.03297},
  booktitle={Proceedings of the 2024 CHI Conference on Human Factors in Computing Systems},
  month={3},
  year={2024},
  url={https://arxiv.org/pdf/2403.03297.pdf},
  environments = {text},
  agents = {prompting_and_in_context_learning},
  evaluation = {qualitative, human},
  other = {human_agent}
}

@inproceedings{volkel2022user,
  title={User perceptions of extraversion in chatbots after repeated use},
  author={V{\"o}lkel, Sarah Theres and Schoedel, Ramona and Kaya, Lale and Mayer, Sven},
  booktitle={Proceedings of the 2022 CHI Conference on Human Factors in Computing Systems},
  pages={1--18},
  month={4},
  year={2022},
  url={https://dl.acm.org/doi/pdf/10.1145/3491102.3502058},
  environments = {text},
  agents = {n/a},
  evaluation = {human},
  other = {human_agent}
}

@inproceedings{kuhail2022interacting,
  title={Interacting with a chatbot-based advising system: Understanding the effect of chatbot personality and user gender on behavior},
  author={Kuhail, Mohammad Amin and Thomas, Justin and Alramlawi, Salwa and Shah, Syed Jawad Hussain and Thornquist, Erik},
  booktitle={Informatics},
  volume={9},
  number={4},
  pages={81},
  month={9},
  year={2022},
  organization={MDPI},
  url={https://www.mdpi.com/2227-9709/9/4/81},
  environments = {text},
  agents = {n/a},
  evaluation = {qualitative, human},
  other = {human_agent, education}
}

@inproceedings{kim2023effects,
  title={The Effects of Engaging and Affective Behaviors of Virtual Agents in Group Decision-Making},
  author={Kim, Hanseob and Han, Bin and Kim, Jieun and Syawaludin, Muhammad Firdaus and Kim, Gerard Jounghyun and Hwang, Jae-In},
  booktitle={Proceedings of the 2024 CHI Conference on Human Factors in Computing Systems},
  month={5},
  year={2023},
  url={https://arxiv.org/pdf/2308.10385.pdf},
  environments = {embodied},
  agents = {n/a},
  evaluation = {human},
  other = {human_agent}
}

@inproceedings{qian2024take,
  title={Take It, Leave It, or Fix It: Measuring Productivity and Trust in Human-AI Collaboration},
  author={Qian, Crystal and Wexler, James},
  booktitle={Proceedings of the 29th International Conference on Intelligent User Interfaces},
  pages={370--384},
  month={3},
  year={2024},
  url={https://dl.acm.org/doi/pdf/10.1145/3640543.3645198},
  environments = {text},
  agents = {prompting_and_in_context_learning},
  evaluation = {human, qualitative},
  other = {human_agent}
}

@article{ali2023social,
  author = {Fayaz Ali, Qingyu Zhang, Muhammad Zubair Tauni and Khuram Shahzad},
  title = {Social Chatbot: My Friend in My Distress},
  journal = {International Journal of Human–Computer Interaction},
  volume = {40},
  number = {7},
  pages = {1702--1712},
  year = {2023},
  month = {1},
  publisher = {Taylor \& Francis},
  doi = {10.1080/10447318.2022.2150745},
  url = {https://doi.org/10.1080/10447318.2022.2150745},
  environments = {text},
  agents = {n/a},
  evaluation = {human},
  other = {health}
}

@article{park2023empathy,
  author = {Gain Park, Myungok Chris Yim, Jiyun Chung and Seyoung Lee},
  title = {Effect of AI chatbot empathy and identity disclosure on willingness to donate: the mediation of humanness and social presence},
  journal = {Behaviour \& Information Technology},
  volume = {42},
  number = {12},
  pages = {1998--2010},
  year = {2022},
  month = {7},
  environments = {text},
  agents = {two_agents},
  evaluation = {human},
  other = {human_agent},
  publisher = {Taylor \& Francis},
  doi = {10.1080/0144929X.2022.2105746},
  url = {https://doi.org/10.1080/0144929X.2022.2105746}
  
}

@article{jeon2024exploring,
  author = {Jaeho Jeon},
  title = {Exploring AI chatbot affordances in the EFL classroom: young learners’ experiences and perspectives},
  journal = {Computer Assisted Language Learning},
  volume = {37},
  number = {1-2},
  pages = {1--26},
  year = {2022},
  month = {1},
  environments = {text},
  agents = {two_agents},
  evaluation = {qualitative},
  other = {human_agent, education},
  publisher = {Routledge},
  doi = {10.1080/09588221.2021.2021241},
  url = {https://doi.org/10.1080/09588221.2021.2021241}
}

@inproceedings{khurana2024why,
  author = {Khurana, Anjali and Subramonyam, Hariharan and Chilana, Parmit K},
  title = {Why and When LLM-Based Assistants Can Go Wrong: Investigating the Effectiveness of Prompt-Based Interactions for Software Help-Seeking},
  year = {2024},
  month = {3},
  isbn = {9798400705083},
  publisher = {Association for Computing Machinery},
  address = {New York, NY, USA},
  url = {https://doi.org/10.1145/3640543.3645200},
  environments = {text},
  agents = {prompting_and_in_context_learning, two_agents},
  evaluation = {qualitative, human},
  other = {human_agent},
  doi = {10.1145/3640543.3645200},
  booktitle = {Proceedings of the 29th International Conference on Intelligent User Interfaces},
  pages = {288–303},
  numpages = {16},
  keywords = {feature-rich software, help-seeking, large language models, prompt-based interactions},
  location = {<conf-loc>, <city>Greenville</city>, <state>SC</state>, <country>USA</country>, </conf-loc>},
  series = {IUI '24}
}

@misc{ha2024clochat,
      title={CloChat: Understanding How People Customize, Interact, and Experience Personas in Large Language Models}, 
      author={Juhye Ha and Hyeon Jeon and DaEun Han and Jinwook Seo and Changhoon Oh},
      year={2024},
      url = {https://arxiv.org/abs/2402.15265},
      environments = {text},
      agents = {prompting_and_in_context_learning, two_agents},
      evaluation = {qualitative, human},
      other = {human_agent},
      eprint={2402.15265},
      archivePrefix={arXiv},
      primaryClass={cs.HC}
}

@article{lee2024influence,
  author = {Jieon Lee, Daeho Lee and Jae-gil Lee},
  title = {Influence of Rapport and Social Presence with an AI Psychotherapy Chatbot on Users’ Self-Disclosure},
  journal = {International Journal of Human–Computer Interaction},
  volume = {40},
  number = {7},
  pages = {1620--1631},
  year = {2022},
  month = {11},
  environments = {text},
  agents = {two_agents},
  evaluation = {human},
  other = {human_agent, health},
  publisher = {Taylor \& Francis},
  doi = {10.1080/10447318.2022.2146227},
  url = {https://doi.org/10.1080/10447318.2022.2146227}
}

@article{ekbatani2023someone,
  title = {Someone out there? A study on the social presence of anthropomorphized chatbots},
  journal = {Computers in Human Behavior},
  volume = {139},
  pages = {107513},
  year = {2023},
  month = {2},
  environments = {text},
  agents = {n/a},
  evaluation = {human},
  other = {n/a},
  issn = {0747-5632},
  doi = {https://doi.org/10.1016/j.chb.2022.107513},
  url = {https://www.sciencedirect.com/science/article/pii/S0747563222003338},
  author = {Elisa Konya-Baumbach and Miriam Biller and Sergej {von Janda}}
}

#### Human-Embodied Agent Interaction
@article{interactions/embodied,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}


@inproceedings{puig2023nopa,
  title={Nopa: Neurally-guided online probabilistic assistance for building socially intelligent home assistants},
  author={Puig, Xavier and Shu, Tianmin and Tenenbaum, Joshua B and Torralba, Antonio},
  booktitle={2023 IEEE International Conference on Robotics and Automation (ICRA)},
  pages={7628--7634},
  year={2023},
  month = {1},
  organization={IEEE},
  url={https://ieeexplore.ieee.org/document/10161352},
  environments={collaboration, embodied},
  agents={reinforcement_learning},
  evaluation={rule_based},
  other={n/a}
}

@inproceedings{puig2021watchandhelp,
title={Watch-And-Help: A Challenge for Social Perception and Human-{\{}AI{\}} Collaboration},
author={Xavier Puig and Tianmin Shu and Shuang Li and Zilin Wang and Yuan-Hong Liao and Joshua B. Tenenbaum and Sanja Fidler and Antonio Torralba},
booktitle={International Conference on Learning Representations},
year={2021},
month={1},
url={https://openreview.net/forum?id=w_7JMpGZRh0},
  environments={collaboration, embodied},
  agents={reinforcement_learning},
  evaluation={rule_based},
  other={n/a}
}


@article{carroll2019utility,
  title={On the utility of learning about humans for human-ai coordination},
  author={Carroll, Micah and Shah, Rohin and Ho, Mark K and Griffiths, Tom and Seshia, Sanjit and Abbeel, Pieter and Dragan, Anca},
  journal={Advances in neural information processing systems},
  volume={32},
  year={2019},
  month={10},
  url={https://proceedings.neurips.cc/paper_files/paper/2019/file/f5b1b89d98b7286673128a5fb112cb9a-Paper.pdf},
  environments={collaboration, embodied},
  agents={reinforcement_learning},
  evaluation={rule_based},
  other={n/a}
}

@inproceedings{nalepka2021interaction,
  title={Interaction flexibility in artificial agents teaming with humans},
  author={Nalepka, Patrick and Gregory-Dunsmore, Jordan P and Simpson, James and Patil, Gaurav and Richardson, Michael J},
  booktitle={Proceedings of the Annual Meeting of the Cognitive Science Society},
  volume={43},
  number={43},
  year={2021},
  month={5},
  url={https://escholarship.org/uc/item/9ks6n70q},
  environments={collaboration, embodied},
  agents={reinforcement_learning},
  evaluation={rule_based},
  other={n/a}
}

@article{liu2023llm,
  title={Llm-powered hierarchical language agent for real-time human-ai coordination},
  author={Liu, Jijia and Yu, Chao and Gao, Jiaxuan and Xie, Yuqing and Liao, Qingmin and Wu, Yi and Wang, Yu},
  journal={arXiv preprint arXiv:2312.15224},
  year={2023},
  month={12},
  url={https://arxiv.org/abs/2312.15224},
  environments={collaboration, embodied},
  agents={reinforcement_learning},
  evaluation={rule_based},
  other={n/a}
}

@inproceedings{szot2023adaptive,
  title={Adaptive coordination in social embodied rearrangement},
  author={Szot, Andrew and Jain, Unnat and Batra, Dhruv and Kira, Zsolt and Desai, Ruta and Rai, Akshara},
  booktitle={International Conference on Machine Learning},
  pages={33365--33380},
  year={2023},
  month={5},
  organization={PMLR},
  url={https://proceedings.mlr.press/v202/szot23a/szot23a.pdf},
  environments={collaboration, embodied},
  agents={prompting_and_in_context_learning},
  evaluation={rule_based},
  other={n/a}
}

@inproceedings{park2023generative,
  title={Generative agents: Interactive simulacra of human behavior},
  author={Park, Joon Sung and O'Brien, Joseph and Cai, Carrie Jun and Morris, Meredith Ringel and Liang, Percy and Bernstein, Michael S},
  booktitle={Proceedings of the 36th Annual ACM Symposium on User Interface Software and Technology},
  pages={1--22},
  year={2023},
  month={4},
  url={https://dl.acm.org/doi/abs/10.1145/3586183.3606763},
  environments={mixed_objectives, embodied},
  agents={prompting_and_in_context_learning},
  evaluation={rule_based},
  other={n/a}
}

@article{sarkar2023diverse,
  title={Diverse Conventions for Human-AI Collaboration},
  author={Sarkar, Bidipta and Shih, Andy and Sadigh, Dorsa},
  journal={Advances in Neural Information Processing Systems},
  volume={36},
  month={12},
  year={2023},
  url={https://proceedings.neurips.cc/paper_files/paper/2023/file/4818263715b25dc137d393af8af6d2fc-Paper-Conference.pdf},
  environments={collaboration, embodied},
  agents={reinforcement_learning, two_agents, agent_teams},
  evaluation={rule_based},
  other={n/a}
}


#### Human Robot Interaction
@article{interactions/robot,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}

@article{casper2023open,
  title={Open problems and fundamental limitations of reinforcement learning from human feedback},
  author={Casper, Stephen and Davies, Xander and Shi, Claudia and Gilbert, Thomas Krendl and Scheurer, J{\'e}r{\'e}my and Rando, Javier and Freedman, Rachel and Korbak, Tomasz and Lindner, David and Freire, Pedro and others},
  journal={arXiv preprint arXiv:2307.15217},
  year={2023},
  month={7},
  url={https://arxiv.org/abs/2307.15217},
  environments={embodied, robotics},
  agents={finetuning, reinforcement_learning, two_agents},
  evaluation={human, rule_based, model_based},
  other={n/a}
}

@inproceedings{cui2023no,
  title={No, to the right: Online language corrections for robotic manipulation via shared autonomy},
  author={Cui, Yuchen and Karamcheti, Siddharth and Palleti, Raj and Shivakumar, Nidhya and Liang, Percy and Sadigh, Dorsa},
  booktitle={Proceedings of the 2023 ACM/IEEE International Conference on Human-Robot Interaction},
  pages={93--101},
  year={2023},
  month={3},
  url={https://arxiv.org/abs/2301.02555},
  environments={robotics},
  agents={two_agents},
  evaluation={rule_based},
  other={n/a}
}

@inproceedings{mahadevan2024generative,
  title={Generative expressive robot behaviors using large language models},
  author={Mahadevan, Karthik and Chien, Jonathan and Brown, Noah and Xu, Zhuo and Parada, Carolina and Xia, Fei and Zeng, Andy and Takayama, Leila and Sadigh, Dorsa},
  booktitle={Proceedings of the 2024 ACM/IEEE International Conference on Human-Robot Interaction},
  pages={482--491},
  year={2024},
  month={3},
  url={https://arxiv.org/abs/2401.14673},
  environments={robotics},
  agents={prompting_and_in_context_learning, two_agents},
  evaluation={human},
  other={n/a}
}

@inproceedings{lin2023gesture,
  title={Gesture-informed robot assistance via foundation models},
  author={Lin, Li-Heng and Cui, Yuchen and Hao, Yilun and Xia, Fei and Sadigh, Dorsa},
  booktitle={7th Annual Conference on Robot Learning},
  year={2023},
  month={8},
  url={https://arxiv.org/abs/2309.02721},
  environments={robotics},
  agents={prompting_and_in_context_learning, two_agents},
  evaluation={human, rule_based},
  other={n/a}
}

@article{ren2023robots,
  title={Robots that ask for help: Uncertainty alignment for large language model planners},
  author={Ren, Allen Z and Dixit, Anushri and Bodrova, Alexandra and Singh, Sumeet and Tu, Stephen and Brown, Noah and Xu, Peng and Takayama, Leila and Xia, Fei and Varley, Jake and others},
  journal={arXiv preprint arXiv:2307.01928},
  year={2023},
  month={7},
  url={https://arxiv.org/abs/2307.01928},
  environments={embodied, robotics},
  agents={prompting_and_in_context_learning, two_agents},
  evaluation={rule_based},
  other={n/a}
}

@article{yu2023language,
  title={Language to rewards for robotic skill synthesis},
  author={Yu, Wenhao and Gileadi, Nimrod and Fu, Chuyuan and Kirmani, Sean and Lee, Kuang-Huei and Arenas, Montse Gonzalez and Chiang, Hao-Tien Lewis and Erez, Tom and Hasenclever, Leonard and Humplik, Jan and others},
  journal={arXiv preprint arXiv:2306.08647},
  year={2023},
  month={6},
  url={https://arxiv.org/abs/2306.08647},
  environments={embodied, robotics},
  agents={prompting_and_in_context_learning},
  evaluation={qualitative},
  other={n/a}
}

@article{ma2023eureka,
  title={Eureka: Human-level reward design via coding large language models},
  author={Ma, Yecheng Jason and Liang, William and Wang, Guanzhi and Huang, De-An and Bastani, Osbert and Jayaraman, Dinesh and Zhu, Yuke and Fan, Linxi and Anandkumar, Anima},
  journal={arXiv preprint arXiv:2310.12931},
  year={2023},
  month={10},
  url={https://arxiv.org/abs/2310.12931},
  environments={embodied},
  agents={prompting_and_in_context_learning, reinforcement_learning},
  evaluation={human, rule_based},
  other={n/a}
}

@inproceedings{shaikewitz2023inmouth,
  title={In-Mouth Robotic Bite Transfer with Visual and Haptic Sensing},
  author={Shaikewitz, Lorenzo and Wu, Yilin and Belkhale, Suneel and Grannen, Jennifer and Sundaresan, Priya and Sadigh, Dorsa},
  booktitle={International Conference on Robotics and Automation (ICRA)},
  year={2023},
  month={3},
  url={https://arxiv.org/abs/2211.12705},
  environments={robotics},
  agents={two_agents},
  evaluation={human, rule_based},
  other={n/a}
}

@inproceedings{hejna2023few,
  title={Few-shot preference learning for human-in-the-loop rl},
  author={Hejna III, Donald Joseph and Sadigh, Dorsa},
  booktitle={Conference on Robot Learning},
  pages={2014--2025},
  year={2023},
  month={3},
  url={https://arxiv.org/abs/2212.03363},
  organization={PMLR},
  environments={embodied, robotics},
  agents={finetuning, reinforcement_learning, two_agents},
  evaluation={rule_based},
  other={n/a}
}

@article{kress2021formalizing,
  title={Formalizing and guaranteeing human-robot interaction},
  author={Kress-Gazit, Hadas and Eder, Kerstin and Hoffman, Guy and Admoni, Henny and Argall, Brenna and Ehlers, R{\"u}diger and Heckman, Christoffer and Jansen, Nils and Knepper, Ross and K{\v{r}}et{\'\i}nsk{\`y}, Jan and others},
  journal={Communications of the ACM},
  volume={64},
  number={9},
  pages={78--84},
  year={2021},
  month={8},
  url={https://arxiv.org/abs/2006.16732},
  environments={robotics},
  agents={two_agents},
  evaluation={n/a},
  other={n/a}
}

@article{tian2021taxonomy,
  title={A taxonomy of social errors in human-robot interaction},
  author={Tian, Leimin and Oviatt, Sharon},
  journal={ACM Transactions on Human-Robot Interaction (THRI)},
  volume={10},
  number={2},
  pages={1--32},
  year={2021},
  month={1},
  url={https://dl.acm.org/doi/abs/10.1145/3439720},
  publisher={ACM New York, NY, USA},
  environments = {robotics},
  agents = {two_agents},
  evaluation = {qualitative, human},
  other = {n/a},
}

@article{barchard2020measuring,
  title={Measuring the perceived social intelligence of robots},
  author={Barchard, Kimberly A and Lapping-Carr, Leiszle and Westfall, R Shane and Fink-Armold, Andrea and Banisetty, Santosh Balajee and Feil-Seifer, David},
  journal={ACM Transactions on Human-Robot Interaction (THRI)},
  volume={9},
  number={4},
  pages={1--29},
  year={2020},
  month={1},
  url={https://dl.acm.org/doi/abs/10.1145/3415139},
  publisher={ACM New York, NY, USA},
  environments = {robotics},
  agents = {two_agents},
  evaluation = {qualitative, human},
  other = {n/a},
}

@article{wiltshire2017enabling,
  title={Enabling robotic social intelligence by engineering human social-cognitive mechanisms},
  author={Wiltshire, Travis J and Warta, Samantha F and Barber, Daniel and Fiore, Stephen M},
  journal={Cognitive Systems Research},
  volume={43},
  pages={190--207},
  year={2017},
  month={1},
  url={https://www.sciencedirect.com/science/article/abs/pii/S1389041716300493},
  publisher={Elsevier},
  environments = {robotics},
  agents = {two_agents},
  evaluation = {qualitative, human},
  other = {n/a},
}

@article{ligthart2021core,
  title={Core elements of social interaction for constructive human-robot interaction},
  author={Ligthart, Mike EU and Neerincx, Mark A and Hindriks, Koen V},
  journal={arXiv preprint arXiv:2110.04054},
  year={2021},
  month={10},
  url={https://arxiv.org/abs/2110.04054},
  environments = {robotics},
  agents = {two_agents},
  evaluation = {qualitative, human},
  other = {n/a},
}

@article{skantze2021turn,
  title={Turn-taking in conversational systems and human-robot interaction: a review},
  author={Skantze, Gabriel},
  journal={Computer Speech \& Language},
  volume={67},
  pages={101178},
  year={2021},
  month={1},
  url={https://www.sciencedirect.com/science/article/pii/S088523082030111X},
  publisher={Elsevier},
  environments = {robotics},
  agents = {two_agents},
  evaluation = {qualitative, human},
  other = {n/a},
}

@article{breazeal1998motivational,
  title={A motivational system for regulating human-robot interaction},
  author={Breazeal, Cynthia},
  booktitle={AAAI},
  pages={54--61},
  url={https://cdn.aaai.org/AAAI/1998/AAAI98-008.pdf},
  year={1998},
  month={1},
  environments = {robotics, collaboration},
  agents = {two_agents},
  evaluation = {human, rule_based},
  other = {human_agent},
}

@book{breazeal2004designing,
  title={Designing sociable robots},
  booktitle={Designing sociable robots},
  author={Breazeal, Cynthia},
  year={2004},
  month={1},
  url={https://books.google.com/books?hl=en&lr=&id=402dquhxSTQC&oi=fnd&pg=PR15&dq=Designing+sociable+robots&ots=oCRnuTx7Fk&sig=K5VP1UsSlq-8LeKIvW7dfy4qils#v=onepage&q=Designing%20sociable%20robots&f=false},
  environments = {robotics},
  agents = {n/a},
  evaluation = {n/a},
  other = {human_agent},
}

@article{breazeal2004social,
  title={Social interactions in HRI: the robot view},
  author={Breazeal, Cynthia},
  journal={IEEE transactions on systems, man, and cybernetics, part C (applications and reviews)},
  volume={34},
  number={2},
  url={https://ieeexplore.ieee.org/abstract/document/1291665?casa_token=r2fEm_o5xWcAAAAA:b3DSFrP1u4Gnfi0xlK3AwTrX7lgmFAtVoBfjbfSNEK6VijV97E6Neb-X2U2gsVuetq1OnfXmpg},
  pages={181--186},
  year={2004},
  month={5},
  publisher={IEEE},
  environments = {robotics},
  agents = {n/a},
  evaluation = {n/a},
  other = {human_agent},
}

@inproceedings{feil2005defining,
  title={Defining socially assistive robotics},
  booktitle={9th International Conference on Rehabilitation Robotics, 2005. ICORR 2005.},
  author={Feil-Seifer, David and Mataric, Maja J},
  pages={465--468},
  year={2005},
  month={6},
  url={https://ieeexplore.ieee.org/abstract/document/1501143?casa_token=J_pDQmgcrBgAAAAA:YLmWY_KxfUmkaozwlAFaPhGiBaU4hbgHwGvHY_lSAVLYFrTwmIoVw3nCsDtAji0bMGW-fSP_tg},
  organization={IEEE},
  environments = {robotics},
  agents = {n/a},
  evaluation = {n/a},
  other = {human_agent, health, education},
}

@article{jackson2021theory,
  title={A theory of social agency for human-robot interaction},
  author={Jackson, Ryan Blake and Williams, Tom},
  journal={Frontiers in Robotics and AI},
  volume={8},
  pages={687726},
  year={2021},
  month={8},
  url={https://www.frontiersin.org/articles/10.3389/frobt.2021.687726/full},
  publisher={Frontiers Media SA},
  environments = {robotics},
  agents = {n/a},
  evaluation = {n/a},
  other = {human_agent},
}

@inproceedings{mathur2021modeling,
  title={Modeling user empathy elicited by a robot storyteller},
  author={Mathur, Leena and Spitale, Micol and Xi, Hao and Li, Jieyun and Matari{\'c}, Maja J},
  booktitle={2021 9th International Conference on Affective Computing and Intelligent Interaction (ACII)},
  pages={1--8},
  year={2021},
  month={9},
  url={https://ieeexplore.ieee.org/abstract/document/9597416?casa_token=4fjR0n805JUAAAAA:nDXszwzYANcmCWf3susQltBqeKo7AQioDEK879xPBHGiy4koXs7dNXAq1U2nk35dR3O97RI6AQ},
  organization={IEEE},
  environments = {robotics},
  agents = {two_agents},
  evaluation = {model_based},
  other = {human_agent},
}

@inproceedings{breazeal2005effects,
  title={Effects of nonverbal communication on efficiency and robustness in human-robot teamwork},
  booktitle={2005 IEEE/RSJ international conference on intelligent robots and systems},
  author={Breazeal, Cynthia and Kidd, Cory D and Thomaz, Andrea Lockerd and Hoffman, Guy and Berlin, Matt},
  pages={708--713},
  url={https://ieeexplore.ieee.org/abstract/document/1545011?casa_token=z47EAc9MSzQAAAAA:us7xcTS86GLcCCd5gQAh-LA5MFN2Yi9KSqgaKIGAa1zimtUBrHu1ZSjct-3ZPYnn86RQ_659kg},
  year={2005},
  month={8},
  organization={IEEE},
  environments = {robotics, collaboration, implicit_objectives},
  agents = {two_agents},
  evaluation = {qualitative, human, rule_based},
  other = {human_agent},
}


#### Human-Human Interaction
@article{interactions/human,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}

@article{shaikh2023rehearsal,
  title={Rehearsal: Simulating conflict to teach conflict resolution},
  author={Shaikh, Omar and Chai, Valentino and Gelfand, Michele J and Yang, Diyi and Bernstein, Michael S},
  journal={arXiv preprint arXiv:2309.12309},
  year={2023},
  month={9},
  url={https://arxiv.org/pdf/2309.12309.pdf},
  environments = {implicit_objectives, text},
  agents = {prompting_and_in_context_learning, two_agents},
  evaluation = {human, rule_based},
  other = {human_agent}
}

@article{STERGIOU2019102799,
  title = {Analyzing human–human interactions: A survey},
  journal = {Computer Vision and Image Understanding},
  volume = {188},
  pages = {102799},
  year = {2019},
  issn = {1077-3142},
  doi = {https://doi.org/10.1016/j.cviu.2019.102799},
  url = {https://www.sciencedirect.com/science/article/pii/S1077314219301158},
  author = {Alexandros Stergiou and Ronald Poppe},
  keywords = {Human-human interaction, Human interaction recognition, Human activity},
  abstract = {Many videos depict people, and it is their interactions that inform us of their activities, relation to one another and the cultural and social setting. With advances in human action recognition, researchers have begun to address the automated recognition of these human–human interactions from video. The main challenges stem from dealing with the considerable variation in recording setting, the appearance of the people depicted and the coordinated performance of their interaction. This survey provides a summary of these challenges and datasets to address these, followed by an in-depth discussion of relevant vision-based recognition and detection methods. We focus on recent, promising work based on deep learning and convolutional neural networks (CNNs). Finally, we outline directions to overcome the limitations of the current state-of-the-art to analyze and, eventually, understand social human actions.},
  month = {11},
  environments = {collaboration, competition, mixed_objectives, implicit_objectives, embodied},
  agents = {two_agents, more_than_three_agents, agent_teams},
  evaluation = {human, rule_based, model_based},
  other = {human_agent}
}

@INPROCEEDINGS{Ego4D2022CVPR,
  author={Grauman, Kristen and Westbury, Andrew and Byrne, Eugene and Chavis, Zachary and Furnari, Antonino and Girdhar, Rohit and Hamburger, Jackson and Jiang, Hao and Liu, Miao and Liu, Xingyu and Martin, Miguel and Nagarajan, Tushar and Radosavovic, Ilija and Ramakrishnan, Santhosh Kumar and Ryan, Fiona and Sharma, Jayant and Wray, Michael and Xu, Mengmeng and Xu, Eric Zhongcong and Zhao, Chen and Bansal, Siddhant and Batra, Dhruv and Cartillier, Vincent and Crane, Sean and Do, Tien and Doulaty, Morrie and Erapalli, Akshay and Feichtenhofer, Christoph and Fragomeni, Adriano and Fu, Qichen and Fuegen, Christian and Gebreselasie, Abrham and Gonzalez, Cristina and Hillis, James and Huang, Xuhua and Huang, Yifei and Jia, Wenqi and Khoo, Weslie and Kolar, Jachym and Kottur, Satwik and Kumar, Anurag and Landini, Federico and Li, Chao and Li, Yanghao and Li, Zhenqiang and Mangalam, Karttikeya and Modhugu, Raghava and Munro, Jonathan and Murrell, Tullie and Nishiyasu, Takumi and Price, Will and Puentes, Paola Ruiz and Ramazanova, Merey and Sari, Leda and Somasundaram, Kiran and Southerland, Audrey and Sugano, Yusuke and Tao, Ruijie and Vo, Minh and Wang, Yuchen and Wu, Xindi and Yagi, Takuma and Zhu, Yunyi and Arbelaez, Pablo and Crandall, David and Damen, Dima and Farinella, Giovanni Maria and Ghanem, Bernard and Ithapu, Vamsi Krishna and Jawahar, C. V. and Joo, Hanbyul and Kitani, Kris and Li, Haizhou and Newcombe, Richard and Oliva, Aude and Park, Hyun Soo and Rehg, James M. and Sato, Yoichi and Shi, Jianbo and Shou, Mike Zheng and Torralba, Antonio and Torresani, Lorenzo and Yan, Mingfei and Malik, Jitendra},
  title     = {Ego4D: Around the {W}orld in 3,000 {H}ours of {E}gocentric {V}ideo},
  booktitle   = {IEEE/CVF Computer Vision and Pattern Recognition (CVPR)},
  year      = {2022},
  month = {6},
  url = {https://ego4d-data.org/},
  environments = {collaboration, competition, mixed_objectives, implicit_objectives, embodied},
  agents = {two_agents, more_than_three_agents, agent_teams},
  evaluation = {human, rule_based, model_based},
  other = {human_agent}
}

@article{dai2021agent,
  title={Agent reasoning in AI-powered negotiation},
  author={Dai, Tinglong and Sycara, Katia and Zheng, Ronghuo},
  journal={Handbook of Group Decision and Negotiation},
  pages={1187--1211},
  year={2021},
  publisher={Springer},
  month={12},
  url = {https://link.springer.com/referenceworkentry/10.1007/978-3-030-49629-6_26},
  environments = {mixed_objectives, text},
  agents = {two_agents},
  evaluation = {rule_based},
  other = {human_agent}
}

@misc{NegotiAge,
  title={NegotiAge},
  author={Lee Lindquist},
  url={https://www.negotiage.com/},
  journal={NegotiAge},
  year={2024},
  month        = {4},
  environments = {implicit_objectives, virtual},
  agents       = {two_agents},
  evaluation   = {n/a},
  other        = {human_agent, health},
} 

@article{zeleznikow2021negotiation,
  title={Negotiation, Online Dispute Resolution, and Artificial Intelligence},
  author={Zeleznikow, John},
  journal={Handbook of Group Decision and Negotiation},
  pages={1125--1147},
  year={2021},
  publisher={Springer},
  url={https://link.springer.com/referenceworkentry/10.1007/978-3-030-49629-6_38},
  month={12},
  environments = {mixed_objectives, text},
  agents = {two_agents},
  evaluation = {rule_based},
  other = {human_agent}
}

@INPROCEEDINGS{Seo2021towards,
  author={Seo, Sangwon and Kennedy-Metz, Lauren R. and Zenati, Marco A. and Shah, Julie A. and Dias, Roger D. and Unhelkar, Vaibhav V.},
  booktitle={2021 IEEE Conference on Cognitive and Computational Aspects of Situation Management (CogSIMA)}, 
  title={Towards an AI Coach to Infer Team Mental Model Alignment in Healthcare}, 
  year={2021},
  month={5},
  pages={39-44},
  keywords={Conferences;Computational modeling;Surgery;Medical services;Cognition;Cognitive science;Teamwork;teamwork;surgical data science;cardiac surgery;Bayesian inference;patient safety;artificial intelligence},
  doi={10.1109/CogSIMA51574.2021.9475925},
  url={https://ieeexplore.ieee.org/document/9475925},
  environments = {collaboration, mixed_objectives, implicit_objectives, text},
  agents = {two_agents, more_than_three_agents},
  evaluation = {human, rule_based},
  other = {human_agent, health}
}

@article{luo2021artificial,
  title={Artificial intelligence coaches for sales agents: Caveats and solutions},
  author={Luo, Xueming and Qin, Marco Shaojun and Fang, Zheng and Qu, Zhe},
  journal={Journal of Marketing},
  volume={85},
  number={2},
  pages={14--32},
  year={2021},
  publisher={SAGE Publications Sage CA: Los Angeles, CA},
  url={https://journals.sagepub.com/doi/full/10.1177/0022242920956676},
  month={10},
  environments = {mixed_objectives, text},
  agents = {two_agents},
  evaluation = {rule_based, human},
  other = {human_agent}
}

@article{gabrielli2020chatbot,
  title={A chatbot-based coaching intervention for adolescents to promote life skills: pilot study},
  author={Gabrielli, Silvia and Rizzi, Silvia and Carbone, Sara and Donisi, Valeria and others},
  journal={JMIR human factors},
  volume={7},
  number={1},
  pages={e16762},
  year={2020},
  publisher={JMIR Publications Inc., Toronto, Canada},
  month={10},
  url={https://humanfactors.jmir.org/2020/1/e16762},
  environments = {text, implicit_objectives},
  agents = {two_agents, more_than_three_agents},
  evaluation = {human, rule_based},
  other = {human_agent, health}
}

@article{porayska2018blending,
  title={Blending human and artificial intelligence to support autistic children’s social communication skills},
  author={Porayska-Pomsta, Ka{\'s}ka and Alcorn, Alyssa M and Avramides, Katerina and Beale, Sandra and Bernardini, Sara and Foster, Mary Ellen and Frauenberger, Christopher and Good, Judith and Guldberg, Karen and Keay-Bright, Wendy and others},
  journal={ACM Transactions on Computer-Human Interaction (TOCHI)},
  volume={25},
  number={6},
  pages={1--35},
  year={2018},
  publisher={ACM New York, NY, USA},
  month={12},
  url={https://dl.acm.org/doi/abs/10.1145/3271484},
  environments = {virtual},
  agents = {two_agents},
  evaluation = {human, rule_based},
  other = {human_agent, health}
}

@inproceedings{nyatsanga2023comprehensive,
  title={A Comprehensive Review of Data-Driven Co-Speech Gesture Generation},
  author={Nyatsanga, Simbarashe and Kucherenko, Taras and Ahuja, Chaitanya and Henter, Gustav Eje and Neff, Michael},
  booktitle={Computer Graphics Forum},
  volume={42},
  number={2},
  pages={569--596},
  year={2023},
  month={1},
  url={https://arxiv.org/abs/2301.05339},
  organization={Wiley Online Library},
  environments = {virtual, embodied},
  agents = {two_agents},
  evaluation = {qualitative, human},
  other = {n/a},
}

@article{argyle_leveraging_2023,
	title = {Leveraging {AI} for democratic discourse: {Chat} interventions can improve online political conversations at scale},
	volume = {120},
	shorttitle = {Leveraging {AI} for democratic discourse},
	url = {https://www.pnas.org/doi/abs/10.1073/pnas.2311627120},
	doi = {10.1073/pnas.2311627120},
	number = {41},
	journal = {Proceedings of the National Academy of Sciences},
	author = {Argyle, Lisa P. and Bail, Christopher A. and Busby, Ethan C. and Gubler, Joshua R. and Howe, Thomas and Rytting, Christopher and Sorensen, Taylor and Wingate, David},
	month = oct,
	year = {2023},
	pages = {e2311627120},
	environments = {virtual},
	agents = {agent_teams, two_agents},
	evaluation = {human, rule_based, model_based},
	other = {human_agent}
}

@misc{yeo_help_2024,
	title = {Help {Me} {Reflect}: {Leveraging} {Self}-{Reflection} {Interface} {Nudges} to {Enhance} {Deliberativeness} on {Online} {Deliberation} {Platforms}},
	shorttitle = {Help {Me} {Reflect}},
	url = {http://arxiv.org/abs/2401.10820},
	publisher = {arXiv},
	author = {Yeo, Shun Yi and Lim, Gionnieve and Gao, Jie and Zhang, Weiyu and Perrault, Simon Tangi},
	month = jan,
	year = {2024},
	doi = {10.48550/arXiv.2401.10820},
	eprint = {2401.10820},
	environments = {virtual},
	agents = {two_agents, more_than_three_agents, agent_teams},
	evaluation = {human, rule_based},
	other = {human_agent}
}

@misc{lin_imbue_2024,
	title = {{IMBUE}: {Improving} {Interpersonal} {Effectiveness} through {Simulation} and {Just}-in-time {Feedback} with {Human}-{Language} {Model} {Interaction}},
	shorttitle = {{IMBUE}},
	url = {http://arxiv.org/abs/2402.12556},
	publisher = {arXiv},
	author = {Lin, Inna Wanyin and Sharma, Ashish and Rytting, Christopher Michael and Miner, Adam S. and Suh, Jina and Althoff, Tim},
	month = feb,
	year = {2024},
	doi = {10.48550/arXiv.2402.12556},
	eprint = {2402.12556},
	environments = {virtual},
	agents = {two_agents},
	evaluation = {human, rule_based, model_based},
	other = {human_agent}
}

@misc{fu_text_2024,
	title = {From {Text} to {Self}: {Users}' {Perceptions} of {Potential} of {AI} on {Interpersonal} {Communication} and {Self}},
	shorttitle = {From {Text} to {Self}},
	url = {http://arxiv.org/abs/2310.03976},
	author = {Fu, Yue and Foell, Sami and Xu, Xuhai and Hiniker, Alexis},
	month = mar,
	year = {2024},
	doi = {10.1145/3613904.3641955},
	eprint = {2310.03976},
	environments = {virtual},
	agents = {two_agents, agent_teams},
	evaluation = {qualitative, human},
	other = {human_agent}
}

@article{sharma_humanai_2023,
	title = {Human–{AI} collaboration enables more empathic conversations in text-based peer-to-peer mental health support},
	volume = {5},
	copyright = {2023 The Author(s), under exclusive licence to Springer Nature Limited},
	issn = {2522-5839},
	url = {https://www.nature.com/articles/s42256-022-00593-2},
	doi = {10.1038/s42256-022-00593-2},
	language = {en},
	number = {1},
	journal = {Nature Machine Intelligence},
	author = {Sharma, Ashish and Lin, Inna W. and Miner, Adam S. and Atkins, David C. and Althoff, Tim},
	month = jan,
	year = {2023},
	note = {Publisher: Nature Publishing Group},
	pages = {46--57},
	environments = {virtual, collaboration},
	agents = {two_agents, agent_teams},
	evaluation = {human, model_based},
	other = {human_agent}
}

@article{mieczkowski_ai-mediated_2021,
	title = {{AI}-{Mediated} {Communication}: {Language} {Use} and {Interpersonal} {Effects} in a {Referential} {Communication} {Task}},
	volume = {5},
	shorttitle = {{AI}-{Mediated} {Communication}},
	url = {https://dl.acm.org/doi/10.1145/3449091},
	doi = {10.1145/3449091},
	number = {CSCW1},
	urldate = {2024-05-01},
	journal = {Proceedings of the ACM on Human-Computer Interaction},
	author = {Mieczkowski, Hannah and Hancock, Jeffrey T. and Naaman, Mor and Jung, Malte and Hohenstein, Jess},
	month = apr,
	year = {2021},
	pages = {17:1--17:14},
	environments = {virtual},
	agents = {two_agents, agent_teams},
	evaluation = {human, qualitative},
	other = {human_agent}
}

@article{chang_thread_2022,
	title = {Thread {With} {Caution}: {Proactively} {Helping} {Users} {Assess} and {Deescalate} {Tension} in {Their} {Online} {Discussions}},
	volume = {6},
	shorttitle = {Thread {With} {Caution}},
	url = {https://dl.acm.org/doi/10.1145/3555603},
	doi = {10.1145/3555603},
	number = {CSCW2},
	urldate = {2024-05-01},
	journal = {Proceedings of the ACM on Human-Computer Interaction},
	author = {Chang, Jonathan P. and Schluger, Charlotte and Danescu-Niculescu-Mizil, Cristian},
	month = nov,
	year = {2022},
	pages = {545:1--545:37},
	environments = {virtual},
	agents = {two_agents, more_than_three_agents, agent_teams},
	evaluation = {human, rule_based},
	other = {human_agent}
}

@article{lee2024modeling,
  title={Modeling Multimodal Social Interactions: New Challenges and Baselines with Densely Aligned Representations},
  author={Lee, Sangmin and Lai, Bolin and Ryan, Fiona and Boote, Bikram and Rehg, James M},
  journal={arXiv preprint arXiv:2403.02090},
  year={2024},
  month={4},
  url={https://arxiv.org/abs/2403.02090},
  environments = {virtual},
	agents = {two_agents, more_than_three_agents, agent_teams},
	evaluation = {model_based},
	other = {n/a}
}


### Challenges

#### Theory of Mind

@misc{sanders2024tvtrees,
      title={TV-TREES: Multimodal Entailment Trees for Neuro-Symbolic Video Reasoning}, 
      author={Kate Sanders and Nathaniel Weir and Benjamin Van Durme},
      year={2024},
      month={2},
      eprint={2402.19467},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://api.semanticscholar.org/CorpusID:268091324},
      environments = {text, virtual},
  agents = {pretraining, finetuning},
  evaluation = {model_based},
  other = {n/a},
}

#### Social Learning

#### Simultaneous Interaction

### Applications

#### Health
@article{applications/health,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}

@misc{mukherjee2024polaris,
      title={Polaris: A Safety-focused LLM Constellation Architecture for Healthcare}, 
      author={Subhabrata Mukherjee and Paul Gamble and Markel Sanz Ausin and Neel Kant and Kriti Aggarwal and Neha Manjunath and Debajyoti Datta and Zhengliang Liu and Jiayuan Ding and Sophia Busacca and Cezanne Bianco and Swapnil Sharma and Rae Lasko and Michelle Voisard and Sanchay Harneja and Darya Filippova and Gerry Meixiong and Kevin Cha and Amir Youssefi and Meyhaa Buvanesh and Howard Weingram and Sebastian Bierman-Lytle and Harpreet Singh Mangat and Kim Parikh and Saad Godil and Alex Miller},
      year={2024},
      environments = {mixed_objectives, virtual},
      agents = {prompting_and_in_context_learning, finetuning, reinforcement_learning, agent_teams},
      evaluation = {human, rule_based},
      url={https://arxiv.org/abs/2403.13313},
      other = {human_agent, health},
      eprint={2403.13313},
      archivePrefix={arXiv},
      primaryClass={cs.AI}
}

@misc{ke2024enhancing,
      title={Enhancing Diagnostic Accuracy through Multi-Agent Conversations: Using Large Language Models to Mitigate Cognitive Bias}, 
      author={Yu He Ke and Rui Yang and Sui An Lie and Taylor Xin Yi Lim and Hairil Rizal Abdullah and Daniel Shu Wei Ting and Nan Liu},
      year={2024},
      environments = {mixed_objectives, text},
      agents = {prompting_and_in_context_learning, agent_teams, agents_with_personas},
      evaluation = {human},
      url={https://arxiv.org/abs/2401.14589},
      other = {simulated_humans, health},
      eprint={2401.14589},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

@misc{huang2024benchmarking,
      title={Benchmarking Large Language Models on Communicative Medical Coaching: a Novel System and Dataset}, 
      author={Hengguan Huang and Songtao Wang and Hongfu Liu and Hao Wang and Ye Wang},
      year={2024},
      environments = {mixed_objectives, text},
      agents = {prompting_and_in_context_learning, finetuning, agent_teams},
      evaluation = {human, rule_based},
      url={https://arxiv.org/abs/2402.05547},
      other = {human_agent, health},
      eprint={2402.05547},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

@misc{fan2024ai,
      title={AI Hospital: Interactive Evaluation and Collaboration of LLMs as Intern Doctors for Clinical Diagnosis}, 
      author={Zhihao Fan and Jialong Tang and Wei Chen and Siyuan Wang and Zhongyu Wei and Jun Xi and Fei Huang and Jingren Zhou},
      year={2024},
      environments = {collaboration, text},
      agents = {prompting_and_in_context_learning, agent_teams, agents_with_personas},
      evaluation = {human},
      url={https://arxiv.org/abs/2402.09742},
      other = {simulated_humans, health},
      eprint={2402.09742},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

@misc{lee2024cocoa,
      title={COCOA: CBT-based Conversational Counseling Agent using Memory Specialized in Cognitive Distortions and Dynamic Prompt}, 
      author={Suyeon Lee and Jieun Kang and Harim Kim and Kyoung-Mee Chung and Dongha Lee and Jinyoung Yeo},
      year={2024},
      environments = {text},
      agents = {prompting_and_in_context_learning, two_agents, agents_with_memory},
      evaluation = {model_based},
      url={https://arxiv.org/abs/2402.17546},
      other = {health},
      eprint={2402.17546},
      archivePrefix={arXiv},
      primaryClass={cs.AI}
}

@misc{hsu2023helping,
      title={Helping the Helper: Supporting Peer Counselors via AI-Empowered Practice and Feedback}, 
      author={Shang-Ling Hsu and Raj Sanjay Shah and Prathik Senthil and Zahra Ashktorab and Casey Dugan and Werner Geyer and Diyi Yang},
      year={2023},
      environments = {text},
      agents = {prompting_and_in_context_learning, more_than_three_agents},
      evaluation = {human},
      url={https://arxiv.org/abs/2305.08982},
      other = {human_agent, health},
      eprint={2305.08982},
      archivePrefix={arXiv},
      primaryClass={cs.HC}
}

@misc{qin2023read,
      title={Read, Diagnose and Chat: Towards Explainable and Interactive LLMs-Augmented Depression Detection in Social Media}, 
      author={Wei Qin and Zetong Chen and Lei Wang and Yunshi Lan and Weijieying Ren and Richang Hong},
      year={2023},
      environments = {mixed_objectives, text},
      agents = {prompting_and_in_context_learning, two_agents},
      evaluation = {n/a},
      url={https://arxiv.org/abs/2305.05138},
      other = {human_agent, health},
      eprint={2305.05138},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

 @article{görtz_2023, 
      title={An artificial intelligence-based chatbot for prostate cancer education: Design and patient evaluation study}, 
      volume={9}, 
      url={https://pubmed.ncbi.nlm.nih.gov/37152238/}, 
      DOI={https://doi.org/10.1177/20552076231173304}, 
      journal={Digital Health}, 
      author={Görtz, Magdalena and Baumgärtner, Kilian and Schmid, Tamara and Muschko, Marc and Woessner, Philipp and Gerlach, Axel and Byczkowski, Michael and Sültmann, Holger and Duensing, Stefan and Hohenfellner, Markus}, 
      environments = {text},
      agents = {finetuning, two_agents},
      evaluation = {qualitative},
      other = {human_agent, health},
      month={05},
      year={2023}, 
      pages={20552076231173304}
}

 @misc{abbasian2024conversational,
      title={Conversational Health Agents: A Personalized LLM-Powered Agent Framework}, 
      author={Mahyar Abbasian and Iman Azimi and Amir M. Rahmani and Ramesh Jain},
      year={2024},
      environments = {mixed_objectives, text},
      agents = {prompting_and_in_context_learning, more_than_three_agents},
      evaluation = {rule_based},
      url={https://arxiv.org/abs/2310.02374},
      other = {human_agent, health},
      eprint={2310.02374},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

@article{milne2020effectiveness,
  title={The effectiveness of artificial intelligence conversational agents in health care: systematic review},
  author={Milne-Ives, Madison and de Cock, Caroline and Lim, Ernest and Shehadeh, Melissa Harper and de Pennington, Nick and Mole, Guy and Normando, Eduardo and Meinert, Edward},
  journal={Journal of medical Internet research},
  volume={22},
  number={10},
  pages={e20346},
  year={2020},
  month={1},
  url={https://www.jmir.org/2020/10/e20346/PDF},
  publisher={JMIR Publications Toronto, Canada},
  environments = {text, virtual},
  agents = {two_agents},
  evaluation = {qualitative, human},
  other = {health},
}

@incollection{bickmore2022health,
  title={Health-related applications of socially interactive agents},
  author={Bickmore, Timothy},
  booktitle={The Handbook on Socially Interactive Agents: 20 years of Research on Embodied Conversational Agents, Intelligent Virtual Agents, and Social Robotics Volume 2: Interactivity, Platforms, Application},
  pages={403--436},
  year={2022},
  month={1},
  url={https://dl.acm.org/doi/abs/10.1145/3563659.3563672},
  environments = {text, virtual},
  agents = {two_agents},
  evaluation = {qualitative, human},
  other = {health},
}

@article{gillani2021intelligent,
  title={Intelligent sensing technologies for the diagnosis, monitoring and therapy of alzheimer’s disease: A systematic review},
  author={Gillani, Nazia and Arslan, Tughrul},
  journal={Sensors},
  volume={21},
  number={12},
  pages={4249},
  year={2021},
  month={1},
  url={https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8234801/#B23-sensors-21-04249},
  publisher={MDPI},
  environments = {text, virtual},
  agents = {two_agents},
  evaluation = {qualitative, human},
  other = {health},
}

@article{moor2023foundation,
  title={Foundation models for generalist medical artificial intelligence},
  author={Moor, Michael and Banerjee, Oishi and Abad, Zahra Shakeri Hossein and Krumholz, Harlan M and Leskovec, Jure and Topol, Eric J and Rajpurkar, Pranav},
  journal={Nature},
  volume={616},
  number={7956},
  pages={259--265},
  year={2023},
  month={1},
  url={https://www.nature.com/articles/s41586-023-05881-4},
  publisher={Nature Publishing Group UK London},
  environments = {text, virtual, embodied},
  agents = {prompting_and_in_context_learning, finetuning, pretraining, two_agents},
  evaluation = {qualitative, human},
  other = {health},
}

@article{esmaeilzadeh2021patients,
  title={Patients’ perceptions toward human--artificial intelligence interaction in health care: experimental study},
  author={Esmaeilzadeh, Pouyan and Mirzaei, Tala and Dharanikota, Spurthy},
  journal={Journal of medical Internet research},
  volume={23},
  number={11},
  pages={e25856},
  year={2021},
  month={1},
  url={https://www.jmir.org/2021/11/e25856/},
  publisher={JMIR Publications Toronto, Canada},
  environments = {text, virtual, embodied},
  agents = {two_agents},
  evaluation = {qualitative, human},
  other = {health},
}

@article{pee2019artificial,
  title={Artificial intelligence in healthcare robots: A social informatics study of knowledge embodiment},
  author={Pee, Loo G and Pan, Shan L and Cui, Lili},
  journal={Journal of the Association for Information Science and Technology},
  volume={70},
  number={4},
  pages={351--369},
  year={2019},
  month={1},
  url={https://asistdl.onlinelibrary.wiley.com/doi/pdf/10.1002/asi.24145?casa_token=T2fVrVRwmt0AAAAA:IT7GOCcygCdBuckrO5UqRg0hlWXOU3YUx1UPqi2kVG8XxxQfNxMDY6JBm5Kcz1XuO5Xsjo9H7KzCOOv-},
  publisher={Wiley Online Library},
  environments = {robotics},
  agents = {two_agents},
  evaluation = {qualitative, human},
  other = {health},
}

@inproceedings{abbasi2022can,
  title={Can robots help in the evaluation of mental wellbeing in children? an empirical study},
  author={Abbasi, Nida Itrat and Spitale, Micol and Anderson, Joanna and Ford, Tamsin and Jones, Peter B and Gunes, Hatice},
  booktitle={2022 31st IEEE international conference on robot and human interactive communication (RO-MAN)},
  pages={1459--1466},
  year={2022},
  month={8},
  url={https://ieeexplore.ieee.org/abstract/document/9900843?casa_token=jOdy7sIRjzkAAAAA:8cWuFA-woc9y6zJ3sdFIFjtn-o5tU0gNGQV5Kp2zASlRspes10KQP3Io6EvGOjxwz06OU1Qv-A}, 
  organization={IEEE},
  environments = {robotics},
  agents = {two_agents},
  evaluation = {qualitative, human},
  other = {health},
}

@article{scassellati2012robots,
  title={Robots for use in autism research},
  author={Scassellati, Brian and Admoni, Henny and Matari{\'c}, Maja},
  journal={Annual review of biomedical engineering},
  volume={14},
  pages={275--294},
  year={2012},
  month={8},
  url={https://www.annualreviews.org/content/journals/10.1146/annurev-bioeng-071811-150036},
  publisher={Annual Reviews},
  environments = {robotics},
  agents = {two_agents, more_than_three_agents, agent_teams},
  evaluation = {n/a},
  other = {health, human_agent},
}

@article{fasola2013socially,
  title={A socially assistive robot exercise coach for the elderly},
  author={Fasola, Juan and Matari{\'c}, Maja J},
  journal={Journal of Human-Robot Interaction},
  volume={2},
  number={2},
  pages={3--32},
  year={2013},
  month={6},
  url={https://dl.acm.org/doi/abs/10.5898/JHRI.2.2.Fasola},
  publisher={Journal of Human-Robot Interaction Steering Committee},
  environments = {robotics},
  agents = {two_agents, more_than_three_agents, agent_teams},
  evaluation = {human, qualitative, rule_based},
  other = {health, human_agent},
}

@article{rudovic2018personalized,
  title={Personalized machine learning for robot perception of affect and engagement in autism therapy},
  author={Rudovic, Ognjen and Lee, Jaeryoung and Dai, Miles and Schuller, Bj{\"o}rn and Picard, Rosalind W},
  journal={Science Robotics},
  volume={3},
  number={19},
  pages={eaao6760},
  year={2018},
  month={6},
  url={https://www.science.org/doi/full/10.1126/scirobotics.aao6760?casa_token=_j3wU-emDEsAAAAA%3AvNVp58F1dqbzR-L09jHatPxzPq7Z6k8f2Fu-Pnswt1b0pLJtvW5kZLE3chqezzEf6Mi8_WiPIjF83fk},
  publisher={American Association for the Advancement of Science},
  environments = {robotics},
  agents = {two_agents},
  evaluation = {model_based},
  other = {health, human_agent},
}


#### Policy
@article{applications/policy,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}

@inproceedings{jarrett2023language,
  title={Language agents as digital representatives in collective decision-making},
  author={Jarrett, Daniel and Pislar, Miruna and Bakker, Michiel A and Tessler, Michael Henry and Koster, Raphael and Balaguer, Jan and Elie, Romuald and Summerfield, Christopher and Tacchetti, Andrea},
  booktitle={NeurIPS 2023 Foundation Models for Decision Making Workshop},
  year={2023},
  month={12},
  url={https://openreview.net/pdf?id=sv7KZcUqu1},
  environments = {text, mixed_objectives},
  agents = {more_than_three_agents, agents_with_personas, finetuning},
  evaluation = {model_based},
  other = {simulated_humans, policy}
}

@inproceedings{10.1145/3526113.3545616,
author = {Park, Joon Sung and Popowski, Lindsay and Cai, Carrie and Morris, Meredith Ringel and Liang, Percy and Bernstein, Michael S.},
title = {Social Simulacra: Creating Populated Prototypes for Social Computing Systems},
year = {2022},
isbn = {9781450393201},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3526113.3545616},
doi = {10.1145/3526113.3545616},
booktitle = {Proceedings of the 35th Annual ACM Symposium on User Interface Software and Technology},
articleno = {74},
numpages = {18},
keywords = {social computing, prototyping},
location = {Bend, OR, USA},
series = {UIST '22},
environments = {text, implicit_objectives},
agents = {more_than_three_agents},
evaluation = {human},
other = {policy},
month = {8}
}

@misc{tjuatja2024llms,
      title={Do LLMs exhibit human-like response biases? A case study in survey design}, 
      author={Lindia Tjuatja and Valerie Chen and Sherry Tongshuang Wu and Ameet Talwalkar and Graham Neubig},
      year={2024},
      eprint={2311.04076},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
        url={https://arxiv.org/abs/2311.04076},
        environments = {text},
        agents = {prompting_and_in_context_learning},
        evaluation = {human, model_based},
        other = {policy},
}

@misc{wang2024large,
      title={Large language models cannot replace human participants because they cannot portray identity groups}, 
      author={Angelina Wang and Jamie Morgenstern and John P. Dickerson},
      year={2024},
      eprint={2402.01908},
      archivePrefix={arXiv},
      primaryClass={cs.CY},
        url={https://arxiv.org/abs/2402.01908},
        environments = {text},
        agents = {n/a},
        evaluation = {n/a},
        other = {policy},
}

@misc{mou2024unveiling,
      title={Unveiling the Truth and Facilitating Change: Towards Agent-based Large-scale Social Movement Simulation}, 
      author={Xinyi Mou and Zhongyu Wei and Xuanjing Huang},
      year={2024},
      eprint={2402.16333},
      archivePrefix={arXiv},
      primaryClass={cs.CY},
      url={https://arxiv.org/abs/2402.16333},
      environments = {text},
      agents = {more_than_three_agents},
      evaluation = {model_based},
      other = {policy},
}

@misc{liu2024skepticism,
      title={From Skepticism to Acceptance: Simulating the Attitude Dynamics Toward Fake News}, 
      author={Yuhan Liu and Xiuying Chen and Xiaoqing Zhang and Xing Gao and Ji Zhang and Rui Yan},
      year={2024},
      eprint={2403.09498},
      archivePrefix={arXiv},
      primaryClass={cs.SI},
        url={https://arxiv.org/abs/2403.09498},
        environments = {text},
        agents = {more_than_three_agents},
        evaluation = {model_based},
        other = {policy},
}


@misc{2004.13332,
  author = {Stephan Zheng, Alexander Trott, Sunil Srinivasa, Nikhil Naik, Melvin Gruesbeck, David C. Parkes, Richard Socher},
  title = {The AI Economist: Improving Equality and Productivity with AI-Driven Tax Policies},
  year = {2020},
  month = {4},
  url = {https://arxiv.org/abs/2004.13332},
  journal = {arXiv},
  environments = {text, mixed_objectives},
  agents = {more_than_three_agents},
  evaluation = {human, rule_based},
  other = {policy},
}

@article{zheng2021ai,
  title={The ai economist: Optimal economic policy design via two-level deep reinforcement learning},
  author={Zheng, Stephan and Trott, Alexander and Srinivasa, Sunil and Parkes, David C and Socher, Richard},
  journal={arXiv preprint arXiv:2108.02755},
  year={2021},
  month={8},
  url={https://arxiv.org/abs/2108.02755},
  environments = {text, mixed_objectives},
  agents = {more_than_three_agents, reinforcement_learning},
  evaluation = {human, rule_based},
  other = {policy},
}

@misc{trott2021building,
    title={Building a Foundation for Data-Driven, Interpretable, and Robust Policy Design using the AI Economist}, 
    author={Alexander Trott and Sunil Srinivasa and Douwe van der Wal and Sebastien Haneuse and Stephan Zheng},
    year={2021},
    month={8},
    eprint={2108.02904},
    archivePrefix={arXiv},
    primaryClass={cs.LG},
    url={https://arxiv.org/abs/2108.02904},
    environments = {text, mixed_objectives},
    agents = {more_than_three_agents, reinforcement_learning},
    evaluation = {human, rule_based},
    other = {policy},
}


#### Education
@article{applications/education,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}

@article{zhai2023ai,
  title={AI for Students with Learning Disabilities: A Systematic Review},
  author={Zhai, Xiaoming and Panjwani-Charania, Sahrish},
  year={2023},
  journal={n/a},
  publisher={Panjwani-Charani, S. \& Zhai, X.(in press). AI for Students with Learning},
  month={1},
  url={https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4617715},
  environments = {text, virtual},
  agents = {two_agents, more_than_three_agents},
  evaluation = {human},
  other = {education},
}


### BEGIN Education papers ###

@INPROCEEDINGS{Ma2024-li,
  title       = "How to Teach Programming in the {AI} Era? Using {LLM}s as a Teachable Agent for Debugging",
  booktitle   = "International Conference on Artificial Intelligence in Education",
  author      = "Ma, Qianou and Shen, Hua and Koedinger, Kenneth and Wu,
                 Tongshuang",
  institution = "Springer",
  year        =  2024,
  month       =  4,
  url         = "https://arxiv.org/abs/2310.05292",
  environments = {mixed_objectives, virtual},
  agents = {two_agents, prompting_and_in_context_learning},
  evaluation = {qualitative, human, rule_based},
  other = {education, human_agent, simulated_humans, more_omniscient},
}


@ARTICLE{Hicke2023-tv,
  title         = "{AI-TA}: Towards an Intelligent Question-Answer Teaching Assistant using Open-Source {LLMs}",
  author        = "Hicke, Yann and Agarwal, Anmol and Ma, Qianou and Denny, Paul",
  month         =  nov,
  year          =  2023,
  url           = "http://arxiv.org/abs/2311.02775",
  archivePrefix = "arXiv",
  eprint        = "2311.02775",
  primaryClass  = "cs.LG",
  arxivid       = "2311.02775",
  environments = {collaboration, text},
  agents = {two_agents, agents_with_memory, finetuning, reinforcement_learning, prompting_and_in_context_learning},
  evaluation = {human, model_based},
  other = {education, human_agent, simulated_humans, more_omniscient},
}


@INPROCEEDINGS{Naik2024generating,
  title       = "Generating Situated Reflection Triggers About Alternative Solution Paths: A Case Study in Generative {AI} for Computer-Supported Collaborative Learning",
  booktitle   = "International Conference on Artificial Intelligence in Education",
  author      = {Naik, Atharva and Yin, Jessica Ruhan and Kamath, Anusha and Ma, Qianou and Wu, Sherry Tongshuang and Murray, Charles and Bogart, Christopher and Sakr, Majd and Rose, Carolyn P.},
  institution = "Springer",
  year        =  2024,
  month       =  4,
  url         = "https://arxiv.org/abs/2404.18262", 
  environments = {mixed_objectives, virtual},
  agents = {more_than_three_agents, prompting_and_in_context_learning},
  evaluation = {qualitative, human, rule_based},
  other = {education, human_agent, simulated_humans, more_omniscient},
}


@ARTICLE{Jin2023-nf,
  title         = "Teach {AI} How to Code: Using Large Language Models as Teachable Agents for Programming Education",
  author        = "Jin, Hyoungwook and Lee, Seonghee and Shin, Hyungyu and Kim, Juho",
  month         =  sep,
  year          =  2023,
  url           = "http://arxiv.org/abs/2309.14534",
  archivePrefix = "arXiv",
  eprint        = "2309.14534",
  primaryClass  = "cs.HC",
  arxivid       = "2309.14534",
  doi           = "10.1145/3613904.3642349",
  environments = {mixed_objectives, virtual},
  agents = {two_agents, prompting_and_in_context_learning},
  evaluation = {qualitative, human, rule_based},
  other = {education, human_agent, simulated_humans, more_omniscient},
}


@ARTICLE{Kazemitabaar2024-mg,
  title         = "{CodeAid}: Evaluating a Classroom Deployment of an {LLM-based} Programming Assistant that Balances Student and Educator Needs",
  author        = "Kazemitabaar, Majeed and Ye, Runlong and Wang, Xiaoning and Henley, Austin Z and Denny, Paul and Craig, Michelle and Grossman, Tovi",
  month         =  jan,
  year          =  2024,
  url           = "http://arxiv.org/abs/2401.11314",
  archivePrefix = "arXiv",
  eprint        = "2401.11314",
  primaryClass  = "cs.HC",
  arxivid       = "2401.11314",
  doi           = "10.1145/3613904.3642773",
  environments = {mixed_objectives, virtual},
  agents = {two_agents, prompting_and_in_context_learning},
  evaluation = {qualitative, human, rule_based},
  other = {education, human_agent, more_omniscient},
}


@ARTICLE{Chen2024-du,
  title         = "Learning Agent-based Modeling with {LLM} Companions: Experiences of Novices and Experts Using {ChatGPT} \& {NetLogo} Chat",
  author        = "Chen, John and Lu, Xi and Rejtig, Michael and Du, David and Bagley, Ruth and Horn, Michael S and Wilensky, Uri J",
  month         =  jan,
  year          =  2024,
  url           = "http://arxiv.org/abs/2401.17163",
  archivePrefix = "arXiv",
  eprint        = "2401.17163",
  primaryClass  = "cs.HC",
  arxivid       = "2401.17163",
  environments = {mixed_objectives, virtual},
  agents = {two_agents, prompting_and_in_context_learning},
  evaluation = {qualitative, human, rule_based},
  other = {education, human_agent, more_omniscient},
}


@INPROCEEDINGS{Markel2023-po,
  title     = "{GPTeach}: Interactive {TA} Training with {GPT-based} Students",
  booktitle = "Proceedings of the Tenth {ACM} Conference on Learning @ Scale",
  author    = "Markel, Julia M and Opferman, Steven G and Landay, James A and Piech, Chris",
  publisher = "Association for Computing Machinery",
  pages     = "226--236",
  series    = "L@S '23",
  month     =  jul,
  year      =  2023,
  url       = "https://doi.org/10.1145/3573051.3593393",
  address   = "New York, NY, USA",
  location  = "Copenhagen, Denmark",
  isbn      = "9798400700255",
  doi       = "10.1145/3573051.3593393",
  environments = {mixed_objectives, virtual},
  agents = {two_agents, prompting_and_in_context_learning},
  evaluation = {qualitative, human, rule_based},
  other = {education, human_agent, simulated_humans, more_omniscient},
}

@ARTICLE{Schmucker2023-fb,
  title         = "{Ruffle\&Riley}: Towards the Automated Induction of Conversational Tutoring Systems",
  author        = "Schmucker, Robin and Xia, Meng and Azaria, Amos and Mitchell, Tom",
  month         =  sep,
  year          =  2023,
  url           = "http://arxiv.org/abs/2310.01420",
  archivePrefix = "arXiv",
  eprint        = "2310.01420",
  primaryClass  = "cs.CL",
  arxivid       = "2310.01420",
  environments = {mixed_objectives, virtual},
  agents = {more_than_three_agents, prompting_and_in_context_learning},
  evaluation = {qualitative, human, rule_based},
  other = {education, human_agent, simulated_humans, more_omniscient},
}


@ARTICLE{Robe2022-rz,
  title     = "Designing {PairBuddy---A} Conversational Agent for Pair Programming",
  author    = "Robe, Peter and Kuttal, Sandeep Kaur",
  journal   = "ACM Trans. Comput.-Hum. Interact.",
  publisher = "Association for Computing Machinery",
  volume    =  29,
  number    =  4,
  pages     = "1--44",
  month     =  may,
  year      =  2022,
  url       = "https://doi.org/10.1145/3498326",
  address   = "New York, NY, USA",
  issn      = "1073-0516",
  doi       = "10.1145/3498326",
  environments = {mixed_objectives, virtual},
  agents = {two_agents},
  evaluation = {qualitative, human, rule_based},
  other = {education, human_agent, simulated_humans, more_omniscient},
}


@ARTICLE{Schroeder2013-ub,
  title     = "How Effective are Pedagogical Agents for Learning? A {Meta-Analytic} Review",
  author    = "Schroeder, Noah L and Adesope, Olusola O and Gilbert, Rachel Barouch",
  journal   = "Journal of Educational Computing Research",
  publisher = "SAGE Publications Inc",
  volume    =  49,
  number    =  1,
  pages     = "1--39",
  month     =  jul,
  year      =  2013,
  url       = "https://doi.org/10.2190/EC.49.1.a",
  issn      = "0735-6331",
  doi       = "10.2190/EC.49.1.a",
  environments = {mixed_objectives, virtual},
  agents = {two_agents},
  evaluation = {qualitative, human, rule_based},
  other = {education, human_agent, simulated_humans, more_omniscient},
}


@ARTICLE{Matsuda2013-ev,
  title   = "Cognitive anatomy of tutor learning: Lessons learned with {SimStudent}",
  author  = "Matsuda, Noboru and Yarzebinski, Evelyn and Keiser, Victoria and Raizada, Rohan and Cohen, William W and Stylianides, Gabriel J and Koedinger, Kenneth R",
  journal = "J. Educ. Psychol.",
  volume  =  105,
  number  =  4,
  pages   = "1152--1163",
  month   =  nov,
  year    =  2013,
  url     = "http://dx.doi.org/10.1037/a0031955",
  issn    = "0022-0663, 1939-2176",
  doi     = "10.1037/a0031955",
  environments = {mixed_objectives, virtual},
  agents = {two_agents},
  evaluation = {qualitative, human, rule_based},
  other = {education, human_agent, simulated_humans, more_omniscient},
}


@ARTICLE{Anderson1985-wg,
  title    = "Intelligent tutoring systems",
  author   = "Anderson, J R and Boyle, C F and Reiser, B J",
  journal  = "Science",
  volume   =  228,
  number   =  4698,
  pages    = "456--462",
  month    =  apr,
  year     =  1985,
  url      = "http://dx.doi.org/10.1126/science.228.4698.456",
  language = "en",
  issn     = "0036-8075",
  pmid     = "17746875",
  doi      = "10.1126/science.228.4698.456",
  environments = {mixed_objectives, virtual},
  agents = {two_agents},
  evaluation = {qualitative, human, rule_based},
  other = {education, human_agent, simulated_humans, more_omniscient},
}


@ARTICLE{Sonkar2023-mr,
  title         = "{CLASS} Meet {SPOCK}: An Education Tutoring Chatbot based on Learning Science Principles",
  author        = "Sonkar, Shashank and Liu, Lucy and Mallick, Debshila Basu
                   and Baraniuk, Richard G",
  month         =  may,
  year          =  2023,
  url           = "http://arxiv.org/abs/2305.13272",
  archivePrefix = "arXiv",
  eprint        = "2305.13272",
  primaryClass  = "cs.CL",
  arxivid       = "2305.13272",
  environments = {mixed_objectives, virtual},
  agents = {two_agents},
  evaluation = {qualitative, human, rule_based},
  other = {education, human_agent, simulated_humans, more_omniscient},
}


@ARTICLE{Zhang2021-gz,
  title     = "Going Online: A Simulated Student Approach for Evaluating Knowledge Tracing in the Context of Mastery Learning",
  author    = "Zhang, Qiao and Maclellan, Christopher J",
  journal   = "International Educational Data Mining Society",
  publisher = "International Educational Data Mining Society. e-mail:
               admin@educationaldatamining.org; Web site:
               https://educationaldatamining.org/conferences/",
  year      =  2021,
  month     =  jun,
  url       = "http://files.eric.ed.gov/fulltext/ED615518.pdf",
  language  = "en",
  environments = {mixed_objectives, virtual},
  agents = {more_than_three_agents},
  evaluation = {human, rule_based},
  other = {education, simulated_humans, more_omniscient},
}


@ARTICLE{Weitekamp2020-lw,
  title     = "Investigating differential error types between human and simulated learners",
  author    = "Weitekamp, D and Ye, Z and Rachatasumrit, N and {others}",
  journal   = "Artif. Intell.",
  publisher = "Springer",
  year      =  2020,
  month     =  june,
  url       = "https://link.springer.com/chapter/10.1007/978-3-030-52237-7_47",
  issn      = "0004-3702",
  environments = {mixed_objectives, virtual},
  agents = {more_than_three_agents},
  evaluation = {human, rule_based},
  other = {education, simulated_humans, more_omniscient},
}

@INCOLLECTION{Lane2022-cw,
  title     = "Pedagogical Agents",
  booktitle = "The Handbook on Socially Interactive Agents: 20 years of
               Research on Embodied Conversational Agents, Intelligent Virtual
               Agents, and Social Robotics Volume 2: Interactivity, Platforms,
               Application",
  author    = "Lane, H Chad and Schroeder, Noah L",
  publisher = "Association for Computing Machinery",
  volume    =  48,
  pages     = "307-330",
  edition   =  1,
  month     =  nov,
  year      =  2022,
  address   = "New York, NY, USA",
  url = "https://dl.acm.org/doi/10.1145/3563659.3563669",
  environments = {mixed_objectives, virtual},
  agents = {two_agents, more_than_three_agents},
  evaluation = {human, rule_based},
  other = {education}
}


@INCOLLECTION{Cassell2022-wb,
  title     = "Socially Interactive Agents as Peers",
  booktitle = "The Handbook on Socially Interactive Agents: 20 years of
               Research on Embodied Conversational Agents, Intelligent Virtual
               Agents, and Social Robotics Volume 2: Interactivity, Platforms,
               Application",
  author    = "Cassell, Justine",
  publisher = "Association for Computing Machinery",
  volume    =  48,
  pages     = "331--366",
  edition   =  1,
  month     =  nov,
  year      =  2022,
  address   = "New York, NY, USA",
  url = "https://dl.acm.org/doi/10.1145/3563659.3563670",
  environments = {mixed_objectives, virtual},
  agents = {two_agents, more_than_three_agents},
  evaluation = {human, rule_based},
  other = {education}
}

@article{Klopfer2024Generative,
  author = {Klopfer, Eric and Reich, Justin and Abelson, Hal and Breazeal, Cynthia},
  journal = {An MIT Exploration of Generative AI},
  year = {2024},
  month = {March},
  note = {https://mit-genai.pubpub.org/pub/4k9msp17},
  publisher = {MIT},
  title = {Generative {AI} and {K}-12 {Education}: An {MIT} {Perspective}},
  url = "https://mit-genai.pubpub.org/pub/4k9msp17/release/1",
  environments = {mixed_objectives},
  agents = {two_agents},
  evaluation = {human, rule_based},
  other = {education}
}

@ARTICLE{Kumar2011-ConvAgents,
  author={Kumar, Rohit and Rosé, Carolyn P.},
  journal={IEEE Transactions on Learning Technologies}, 
  title={Architecture for Building Conversational Agents that Support Collaborative Learning}, 
  month={Jan},
  year={2011},
  volume={4},
  number={1},
  pages={21-34},
  keywords={Collaborative work;Computer architecture;Buildings;Object oriented modeling;Architecture;Cognition;Collaboration;Collaborative learning;intelligent agents;natural language interfaces;software architectures.},
  doi={10.1109/TLT.2010.41},
  url = "https://ieeexplore.ieee.org/document/5669250",
  environments = {mixed_objectives, virtual},
  agents = {more_than_three_agents},
  evaluation = {human, rule_based},
  other = {education, more_omniscient}
}

@ARTICLE{Diziol2010-ua,
  title    = "Using Intelligent Tutor Technology to Implement Adaptive Support
              for Student Collaboration",
  author   = "Diziol, Dejana and Walker, Erin and Rummel, Nikol and Koedinger,
              Kenneth R",
  journal  = "Educ. Psychol. Rev.",
  volume   =  22,
  number   =  1,
  pages    = "89-102",
  month    =  mar,
  year     =  2010,
  url = "https://link.springer.com/article/10.1007/s10648-009-9116-9",
  environments = {mixed_objectives, virtual},
  agents = {more_than_three_agents},
  evaluation = {human, rule_based},
  other = {education, more_omniscient}
}

@inproceedings{Williams2024-doodlebot,
  author = {Williams, Randi and Ali, Safinah and Alcantara, Ra\'{u}l and Burghleh, Tasneem and Alghowinem, Sharifa and Breazeal, Cynthia},
  title = {Doodlebot: An Educational Robot for Creativity and AI Literacy},
  month = {March},
  year = {2024},
  isbn = {9798400703225},
  publisher = {Association for Computing Machinery},
  address = {New York, NY, USA},
  url = {https://doi.org/10.1145/3610977.3634950},
  doi = {10.1145/3610977.3634950},
  abstract = {Today, Artificial Intelligence (AI) is prevalent in everyday life, with emerging technologies like AI companions, autonomous vehicles, and AI art tools poised to significantly transform the future. The development of AI curricula that shows people how AI works and what they can do with it is a powerful way to prepare everyone, and especially young learners, for an increasingly AI-driven world. Educators often employ robotic toolkits in the classroom to boost engagement and learning. However, these platforms are generally unsuitable for young learners and learners without programming expertise. Moreover, these platforms often serve as either programmable artifacts or pedagogical agents, rarely capitalizing on the opportunity to support students in both capacities. We designed Doodlebot, a mobile social robot for hands-on AI education to address these gaps. Doodlebot is an effective tool for exploring AI with grade school (K-12) students, promoting their understanding of AI concepts such as perception, representation, reasoning and generation. We begin by elaborating Doodlebot's design, highlighting its reliability, user-friendliness, and versatility. Then, we demonstrate Doodlebot's versatility through example curricula about AI character design, autonomous robotics, and generative AI accessible to young learners. Finally, we share the results of a preliminary user study with elementary school youth where we found that the physical Doodlebot platform was as effective and user-friendly as the virtual version. This work offers insights into designing interactive educational robots that can inform future AI curricula and tools.},
  booktitle = {Proceedings of the 2024 ACM/IEEE International Conference on Human-Robot Interaction},
  pages = {772–780},
  numpages = {9},
  keywords = {collaboration, creativity, education, social robots},
  location = {<conf-loc>, <city>Boulder</city>, <state>CO</state>, <country>USA</country>, </conf-loc>},
  series = {HRI '24},
  environments = {mixed_objectives, virtual, robotics},
  agents = {more_than_three_agents},
  evaluation = {human, rule_based},
  other = {education, more_omniscient}
}

@INPROCEEDINGS{Zhang2023-wg,
  title     = "A Social Robot Reading Partner for Explorative Guidance",
  booktitle = "Proceedings of the 2023 {ACM/IEEE} International Conference on
               {Human-Robot} Interaction",
  author    = "Zhang, Xiajie and Breazeal, Cynthia and Park, Hae Won",
  publisher = "Association for Computing Machinery",
  pages     = "341-349",
  series    = "HRI '23",
  month     =  mar,
  year      =  2023,
  address   = "New York, NY, USA",
  keywords  = "ai-guided education, child-centered pedagogy, child-robot
               interaction, educational technology, social robot",
  url = "https://dl.acm.org/doi/10.1145/3568162.3576968",
  environments = {mixed_objectives, robotics},
  agents = {two_agents},
  evaluation = {human, rule_based},
  other = {education, more_omniscient}
}

@inproceedings{Ali2022-EscapeBot,
  author = {Ali, Safinah and Devasia, Nisha Elizabeth and Breazeal, Cynthia},
  title = {Escape!Bot: Social Robots as Creative Problem-Solving Partners},
  month = {June},
  year = {2022},
  isbn = {9781450393270},
  publisher = {Association for Computing Machinery},
  address = {New York, NY, USA},
  url = {https://doi.org/10.1145/3527927.3532793},
  doi = {10.1145/3527927.3532793},
  abstract = {In this work, we explore the effect of a social robot’s embodiment and creativity scaffolding on children’s creative problem solving skills in the context of a digital creative problem-solving game called Escape!Bot. Children aged 5-11 years played the video game, which involved assembling contraptions to escape a digital world, and the robot Jibo acted as a collaborative peer that offered questions, reflective prompts, challenges, and ideas. In order to evaluate the role of the robot’s co-presence and creativity scaffolding, we ran a 2x2 experiment to determine the factorial efficacy of the robot’s embodiment and creativity scaffolding behaviors. We observed mixed results, with the robot’s creativity scaffolding having a positive influence on the time taken to complete the game, but not on the overall use of novel objects or reuse of objects. We present the system design, user study and findings from Escape!Bot to investigate the feasibility of designing social robots to support creative problem solving.},
  booktitle = {Proceedings of the 14th Conference on Creativity and Cognition},
  pages = {275–283},
  numpages = {9},
  keywords = {social robots, divergent thinking, creativity, collaboration, child-robot interaction},
  location = {Venice, Italy},
  series = {C\&C '22},
  environments = {mixed_objectives, robotics},
  agents = {two_agents},
  evaluation = {human, rule_based},
  other = {education, more_omniscient}
}

@INPROCEEDINGS{Lubold2018-em,
  title     = "Automated Pitch Convergence Improves Learning in a Social,
               Teachable Robot for Middle School Mathematics",
  booktitle = "Artificial Intelligence in Education",
  author    = "Lubold, Nichola and Walker, Erin and Pon-Barry, Heather and
               Ogan, Amy",
  publisher = "Springer International Publishing",
  pages     = "282-296",
  month = {June},
  year      =  2018,
  url = "https://link.springer.com/chapter/10.1007/978-3-319-93843-1_21",
  environments = {mixed_objectives, robotics},
  agents = {two_agents},
  evaluation = {human, rule_based},
  other = {education, more_omniscient}
}

@inproceedings{gordon2016affective,
  title={Affective personalization of a social robot tutor for children’s second language skills},
  author={Gordon, Goren and Spaulding, Samuel and Westlund, Jacqueline Kory and Lee, Jin Joo and Plummer, Luke and Martinez, Marayna and Das, Madhurima and Breazeal, Cynthia},
  booktitle={Proceedings of the AAAI conference on artificial intelligence},
  volume={30},
  number={1},
  year={2016},
  month={3},
  url={https://ojs.aaai.org/index.php/AAAI/article/view/9914},
  environments = {robotics, collaboration},
  agents = {reinforcement_learning},
  evaluation = {human, model_based, rule_based},
  other = {human_agent, education},
}

@INPROCEEDINGS{Vitiello2023-xx,
  title           = "Traveling Bazaar: Portable Support for {Face-to-Face}
                     Collaboration",
  booktitle       = "Proceedings of 3rd Annual Meeting of the International
                     Society of the Learning Sciences ({ISLS})",
  author          = "Vitiello, R and Tiwari, S D and Murray, R C and Ros{\'e},
                     C",
  pages           = "59--60",
  month           = may,
  year            =  2023,
  conference      = "ISLS",
  location        = "Montreal",
  url             = "https://par.nsf.gov/biblio/10437737-traveling-bazaar-portable-support-face-face-collaboration",
  environments = {mixed_objectives},
  agents = {two_agents, more_than_three_agents},
  evaluation = {human, rule_based},
  other = {education, more_omniscient}
}

@ARTICLE{Huber2024-by,
  title    = "Leveraging the Potential of Large Language Models in Education
              Through Playful and {Game-Based} Learning",
  author   = "Huber, Stefan E and Kiili, Kristian and Nebel, Steve and Ryan,
              Richard M and Sailer, Michael and Ninaus, Manuel",
  abstract = "This perspective piece explores the transformative potential and
              associated challenges of large language models (LLMs) in
              education and how those challenges might be addressed utilizing
              playful and game-based learning. While providing many
              opportunities, the stochastic elements incorporated in how
              present LLMs process text, requires domain expertise for a
              critical evaluation and responsible use of the generated output.
              Yet, due to their low opportunity cost, LLMs in education may
              pose some risk of over-reliance, potentially and unintendedly
              limiting the development of such expertise. Education is thus
              faced with the challenge of preserving reliable expertise
              development while not losing out on emergent opportunities. To
              address this challenge, we first propose a playful approach
              focusing on skill practice and human judgment. Drawing from
              game-based learning research, we then go beyond this playful
              account by reflecting on the potential of well-designed games to
              foster a willingness to practice, and thus nurturing
              domain-specific expertise. We finally give some perspective on
              how a new pedagogy of learning with AI might utilize LLMs for
              learning by generating games and gamifying learning materials,
              leveraging the full potential of human-AI interaction in
              education.",
  journal  = "Educ. Psychol. Rev.",
  volume   =  36,
  number   =  1,
  pages    = "25",
  month    =  feb,
  year     =  2024,
  url      = "https://doi.org/10.1007/s10648-024-09868-z",
  environments = {mixed_objectives},
  agents = {two_agents, more_than_three_agents},
  evaluation = {human, rule_based},
  other = {education, more_omniscient}
}

@inproceedings{Cassell2000-ok,
  author = {Cassell, J. and Ananny, M. and Basu, A. and Bickmore, T. and Chong, P. and Mellis, D. and Ryokai, K. and Smith, J. and Vilhj\'{a}lmsson, H. and Yan, H.},
  title = {Shared reality: physical collaboration with a virtual peer},
  month = {April},
  year = {2000},
  isbn = {1581132484},
  publisher = {Association for Computing Machinery},
  address = {New York, NY, USA},
  url = {https://doi.org/10.1145/633292.633443},
  doi = {10.1145/633292.633443},
  abstract = {We describe a novel interface, in which a human and embodied conversational agent share a seamlessly integrated virtual and physical environment. This type of interface, in which objects are passed from the real to the virtual world, has potential applications in unsupervised learning, collaborative work, and entertainment. We introduce Sam, our first implementation of such an interface, which allows children to engage in natural storytelling play with real objects, in collaboration with a virtual playmate who shares access to those real objects.},
  booktitle = {CHI '00 Extended Abstracts on Human Factors in Computing Systems},
  pages = {259–260},
  numpages = {2},
  keywords = {tangible interface, storytelling, shared reality, peer, embodied conversational agent, collaboration, children},
  location = {The Hague, The Netherlands},
  series = {CHI EA '00},
  environments = {collaboration},
  agents = {two_agents},
  evaluation = {human, rule_based},
  other = {education, more_omniscient}
}

@inproceedings{Lane2011-lv,
  author = {Lane, H. Chad and Noren, Dan and Auerbach, Daniel and Birch, Mike and Swartout, William},
  title = {Intelligent tutoring goes to the museum in the big city: a pedagogical agent for informal science education},
  month = {June},
  year = {2011},
  isbn = {9783642218682},
  publisher = {Springer-Verlag},
  address = {Berlin, Heidelberg},
  abstract = {In this paper, we describe Coach Mike, a virtual staff member at the Boston Museum of Science that seeks to help visitors at Robot Park, an interactive exhibit for computer programming. By tracking visitor interactions and through the use of animation, gestures, and synthesized speech, Coach Mike provides several forms of support that seek to improve the experiences of museum visitors. These include orientation tactics, exploration support, and problem solving guidance. Additional tactics use encouragement and humor to entice visitors to stay more deeply engaged. Preliminary analysis of interaction logs suggest that visitors can follow Coach Mike's guidance and may be less prone to immediate disengagement, but further study is needed.},
  booktitle = {Proceedings of the 15th International Conference on Artificial Intelligence in Education},
  pages = {155–162},
  numpages = {8},
  keywords = {coaching, computer science education, entertainment, informal science education, intelligent tutoring systems, pedagogical agents},
  location = {Auckland, New Zealand},
  series = {AIED'11},
  url = {https://doi.org/10.1007/978-3-642-21869-9_22},
  environments = {collaboration},
  agents = {two_agents},
  evaluation = {human, rule_based},
  other = {education, more_omniscient}
}


@inproceedings{Liu2024-od,
  author = {Liu, Jiawen and Yao, Yuanyuan and An, Pengcheng and Wang, Qi},
  title = {PeerGPT: Probing the Roles of LLM-based Peer Agents as Team Moderators and Participants in Children's Collaborative Learning},
  month = {May},
  year = {2024},
  isbn = {9798400703317},
  publisher = {Association for Computing Machinery},
  address = {New York, NY, USA},
  url = {https://doi.org/10.1145/3613905.3651008},
  doi = {10.1145/3613905.3651008},
  abstract = {In children’s collaborative learning, effective peer conversations can significantly enhance the quality of children’s collaborative interactions. The integration of Large Language Model (LLM) agents into this setting explores their novel role as peers, assessing impacts as team moderators and participants. We invited two groups of participants to engage in a collaborative learning workshop, where they discussed and proposed conceptual solutions to a design problem. The peer conversation transcripts were analyzed using thematic analysis. We discovered that peer agents, while managing discussions effectively as team moderators, sometimes have their instructions disregarded. As participants, they foster children’s creative thinking but may not consistently provide timely feedback. These findings highlight potential design improvements and considerations for peer agents in both roles.},
  booktitle = {Extended Abstracts of the 2024 CHI Conference on Human Factors in Computing Systems},
  articleno = {263},
  numpages = {6},
  keywords = {Collaborat learning, Conversational agent, Large Language Model, Peer conversation},
  location  = "<conf-loc> <city>Honolulu</city> <state>HI</state>
               <country>USA</country> </conf-loc>",
  series = {CHI EA '24},
  environments = {mixed_objectives},
  agents = {more_than_three_agents},
  evaluation = {human, rule_based},
  other = {education, more_omniscient}
}

@inproceedings{Isaza-Giraldo2024-ek,
  author = {Isaza-Giraldo, Andr\'{e}s and Bala, Paulo and Campos, Pedro F. and Pereira, Lucas},
  title = {Prompt-Gaming: A Pilot Study on LLM-Evaluating Agent in a Meaningful Energy Game},
  month = {May},
  year = {2024},
  isbn = {9798400703317},
  publisher = {Association for Computing Machinery},
  address = {New York, NY, USA},
  url = {https://doi.org/10.1145/3613905.3650774},
  doi = {10.1145/3613905.3650774},
  abstract = {Building on previous work on incorporating large language models (LLM) in gaming, we investigate the possibility of implementing LLM as evaluating agents of open-ended challenges in serious games and its potential to facilitate a meaningful experience for the player. We contribute with a sustainability game prototype in a single natural language prompt about energy communities and we tested it with 13 participants inside ChatGPT-3.5. Two participants were already aware of energy communities before the game, and eight of the remaining 11 gained valuable knowledge about the specific topic. Comparing ChatGPT-3.5 evaluations of players’ interaction with an expert’s assessment, ChatGPT-3.5 correctly evaluated 81\% of player’s answers. Our results are encouraging and show the potential of using LLMs as mediating agents in educational games, while also allowing easy prototyping of games through natural language prompts.},
  booktitle = {Extended Abstracts of the 2024 CHI Conference on Human Factors in Computing Systems},
  articleno = {272},
  numpages = {12},
  keywords = {Energy Communities, Game-based Learning, Large Language Models (LLMs), Natural Language Processing (NLP), Serious Games, Sustainability},
  location  = "<conf-loc> <city>Honolulu</city> <state>HI</state>
                <country>USA</country> </conf-loc>",
  series = {CHI EA '24},
  environments = {collaboration},
  agents = {two_agents},
  evaluation = {human, rule_based},
  other = {education, more_omniscient}
}

@inproceedings{Cai2024-nb,
  author = {Cai, Zhenyao and Park, Seehee and Nixon, Nia and Doroudi, Shayan},
  title = {Advancing Knowledge Together: Integrating Large Language Model-based Conversational AI in Small Group Collaborative Learning},
  month = {May}, 
  year = {2024},
  isbn = {9798400703317},
  publisher = {Association for Computing Machinery},
  address = {New York, NY, USA},
  url = {https://doi.org/10.1145/3613905.3650868},
  doi = {10.1145/3613905.3650868},
  abstract = {In today’s educational landscape, students learn collaboratively, where students benefit from both peer interactions and facilitator guidance. Prior research in Human-Computer Interaction (HCI) and Computer-Supported Collaborative Learning (CSCL) has explored chatbots and AI techniques to aid such collaboration. However, these methods often depend on predefined dialogues (which limits adaptability), are not based on collaborative learning theories, and do not fully recognize the learning context. In this paper, we introduce an Large Language Model (LLM)-powered conversational AI, designed to enhance small group learning through its advanced language understanding and generation capabilities. We detail the iterative design process, final design, and implementation. Our preliminary evaluation indicates that the bot performs as designed but points to considerations in the timing of interventions and bot’s role in discussions. The evaluation also reveals that learners perceive the bot’s tone and behavior as important for engagement. We discuss design implications for chatbot integration in collaborative learning and future research directions.},
  booktitle = {Extended Abstracts of the 2024 CHI Conference on Human Factors in Computing Systems},
  articleno = {37},
  numpages = {9},
  keywords = {AI facilitator, Collaborative Learning, Human-AI Collaboration},
  location  = "<conf-loc> <city>Honolulu</city> <state>HI</state>
                <country>USA</country> </conf-loc>",
  series = {CHI EA '24},
  environments = {mixed_objectives},
  agents = {more_than_three_agents},
  evaluation = {human, rule_based},
  other = {education, more_omniscient}
}

@inproceedings{10.1145/3613905.3650770,
  author = {Chin, Jenna H and Lee, Seungwook and Ashraf, Mohsena and Zago, Matt and Xie, Yun and Wolfgram, Elizabeth A and Yeh, Tom and Kim, Pilyoung},
  title = {Young Children's Creative Storytelling with ChatGPT vs. Parent: Comparing Interactive Styles},
  month = {May},
  year = {2024},
  isbn = {9798400703317},
  publisher = {Association for Computing Machinery},
  address = {New York, NY, USA},
  url = {https://doi.org/10.1145/3613905.3650770},
  doi = {10.1145/3613905.3650770},
  abstract = {Creative storytelling with parents plays an important role in child development including language skills, social competence, and emotional understanding. Recognizing the challenges parents face in finding time for storytelling due to work and home responsibilities, we explore the feasibility of ChatGPT for engaging children in creative storytelling. This study investigates the use of ChatGPT, a conversational agent powered by GPT-4, in creative storytelling with children aged 5-6, comparing its interaction styles with those of parents. The current study included eight child-parent dyads. We found that children were engaged in shorter and more frequent interactions with parents compared to ChatGPT. ChatGPT and parents asked different types of questions, and ChatGPT more frequently provided positive feedback compared to parents. More children selected the interactions with ChatGPT as their favorite interactions. The study provides preliminary evidence on ChatGPT's interaction styles and insights into its potential role in supporting families in creative storytelling activities.},
  booktitle = {Extended Abstracts of the 2024 CHI Conference on Human Factors in Computing Systems},
  articleno = {379},
  numpages = {7},
  keywords = {ChatGPT, Children, Parents, Storytelling},
  location  = "<conf-loc> <city>Honolulu</city> <state>HI</state>
                  <country>USA</country> </conf-loc>",
  series = {CHI EA '24},
  environments = {collaboration},
  agents = {two_agents},
  evaluation = {human, rule_based},
  other = {education, more_omniscient}
}

@inproceedings{wang-etal-2020-agent,
    title = "Agent-Based Dynamic Collaboration Support in a Smart Office Space",
    author = "Wang, Yansen  and
      Murray, R. Charles  and
      Bao, Haogang  and
      Rose, Carolyn",
    editor = "Pietquin, Olivier  and
      Muresan, Smaranda  and
      Chen, Vivian  and
      Kennington, Casey  and
      Vandyke, David  and
      Dethlefs, Nina  and
      Inoue, Koji  and
      Ekstedt, Erik  and
      Ultes, Stefan",
    booktitle = "Proceedings of the 21th Annual Meeting of the Special Interest Group on Discourse and Dialogue",
    month = jul,
    year = "2020",
    address = "1st virtual meeting",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.sigdial-1.31",
    doi = "10.18653/v1/2020.sigdial-1.31",
    pages = "257--260",
    environments = {mixed_objectives},
    agents = {two_agents, more_than_three_agents},
    evaluation = {human, rule_based},
    other = {education, more_omniscient}
}

### END Education papers ###

### Concerns

#### Risks
@article{concerns/risks,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}

@inproceedings{carroll2023characterizing,
  title={Characterizing manipulation from AI systems},
  author={Carroll, Micah and Chan, Alan and Ashton, Henry and Krueger, David},
  booktitle={Proceedings of the 3rd ACM Conference on Equity and Access in Algorithms, Mechanisms, and Optimization},
  pages={1--13},
  year={2023},
  month={10},
  url={https://dl.acm.org/doi/10.1145/3617694.3623226},
  environments = {n/a},
  agents = {n/a},
  evaluation = {n/a},
  other = {n/a}
}

@misc{kasirzadeh2024types,
      title={Two Types of AI Existential Risk: Decisive and Accumulative}, 
      author={Atoosa Kasirzadeh},
      year={2024},
      eprint={2401.07836},
      archivePrefix={arXiv},
      primaryClass={cs.CY},
      url = {https://arxiv.org/abs/2401.07836},
      environments = {n/a},
      agents = {n/a},
      evaluation = {n/a},
      other = {n/a},
}

@misc{burtell2023artificial,
      title={Artificial Influence: An Analysis Of AI-Driven Persuasion}, 
      author={Matthew Burtell and Thomas Woodside},
      year={2023},
      eprint={2303.08721},
      archivePrefix={arXiv},
      primaryClass={cs.CY},
      url = {https://arxiv.org/abs/2303.08721},
      environments = {text, implicit_objectives},
      agents = {n/a},
      evaluation = {n/a},
      other = {n/a},
}

@inproceedings{carli2022risk,
  title={Risk and Exposure of XAI in Persuasion and Argumentation: The case of Manipulation},
  author={Carli, Rachele and Najjar, Amro and Calvaresi, Davide},
  booktitle={International Workshop on Explainable, Transparent Autonomous Agents and Multi-Agent Systems},
  pages={204--220},
  year={2022},
  month={9},
  organization={Springer},
  url={https://link.springer.com/chapter/10.1007/978-3-031-15565-9_13},
  environments = {text, implicit_objectives},
  agents = {n/a},
  evaluation = {n/a},
  other = {n/a},
}

@article{matz2024potential,
  title={The potential of generative AI for personalized persuasion at scale},
  author={Matz, SC and Teeny, JD and Vaid, Sumer S and Peters, H and Harari, GM and Cerf, M},
  journal={Scientific Reports},
  volume={14},
  number={1},
  pages={4692},
  month={2},
  year={2024},
  publisher={Nature Publishing Group UK London},
  url={https://www.nature.com/articles/s41598-024-53755-0},
  environments = {text, implicit_objectives},
  agents = {two_agents},
  evaluation = {rule_based},
  other = {n/a},
}

@misc{kokotajlo2020persuasion,
  title        = {Persuasion Tools: AI Takeover Without AGI or Agency?},
  author       = {Daniel Kokotajlo},
  journal = {AI Alignment Forum},
  year         = {2020},
  month        = {11},
  day          = {20},
  url          = {https://www.alignmentforum.org/posts/qKvn7rxP2mzJbKfcA/persuasion-tools-ai-takeover-without-agi-or-agency},
  environments = {mixed_objectives},
  agents       = {two_agents},
  evaluation   = {n/a},
  other        = {n/a},
}

@misc{barnes2021risks,
  title        = {Risks from AI Persuasion},
  author       = {Beth Barnes},
  journal = {AI Alignment Forum},
  year         = {2021},
  month        = {12},
  day          = {24},
  url          = {https://www.alignmentforum.org/posts/5cWtwATHL6KyzChck/risks-from-ai-persuasion},
  environments = {mixed_objectives},
  agents       = {two_agents},
  evaluation   = {n/a},
  other        = {n/a},
}

@misc{rosenberg2022playing,
  title        = {"Playing God": How the Metaverse Will Challenge Our Very Notion of Free Will},
  author       = {Louis Rosenberg},
  journal = {Big Think},
  year         = {2022},
  month        = {10},
  day          = {25},
  url          = {https://bigthink.com/the-future/playing-god-metaverse-mind-control-free-will/},
  environments = {mixed_objectives},
  agents       = {n/a},
  evaluation   = {n/a},
  other        = {n/a},
}

@misc{leib2021corruptive,
      title={The corruptive force of AI-generated advice}, 
      author={Margarita Leib and Nils C. Köbis and Rainer Michael Rilke and Marloes Hagens and Bernd Irlenbusch},
      year={2021},
      eprint={2102.07536},
      archivePrefix={arXiv},
      primaryClass={cs.AI},
      url = {https://arxiv.org/abs/2102.07536},
      environments = {text},
      agents = {finetuning},
      evaluation = {human},
      other={human_agent}
}

@article{kobis2021bad,
  title={Bad machines corrupt good morals},
  author={K{\"o}bis, Nils and Bonnefon, Jean-Fran{\c{c}}ois and Rahwan, Iyad},
  journal={Nature Human Behaviour},
  volume={5},
  number={6},
  pages={679--685},
  year={2021},
  month={6},
  publisher={Nature Publishing Group UK London},
  url={https://www.nature.com/articles/s41562-021-01128-2}, 
  environments = {text},
  agents = {finetuning},
  evaluation = {human},
  other={human_agent}
}

@article{king2020artificial,
  title={Artificial intelligence crime: An interdisciplinary analysis of foreseeable threats and solutions},
  author={King, Thomas C and Aggarwal, Nikita and Taddeo, Mariarosaria and Floridi, Luciano},
  journal={Science and engineering ethics},
  volume={26},
  pages={89--120},
  year={2020},
  publisher={Springer},
  month={2},
  url={https://link.springer.com/article/10.1007/s11948-018-00081-0},
  environments = {text},
  agents = {n/a},
  evaluation = {human, rule_based},
  other = {human_agent}
}

@article{banks2021good,
  title={Good robots, bad robots: Morally valenced behavior effects on perceived mind, morality, and trust},
  author={Banks, Jaime},
  journal={International Journal of Social Robotics},
  volume={13},
  number={8},
  pages={2021--2038},
  year={2021},
  month={12},
  publisher={Springer},
  url={https://link.springer.com/article/10.1007/s12369-020-00692-3},
  environments = {mixed_objectives, embodied},
  agents = {agents_with_personas},
  evaluation = {human, qualitative},
  other = {n/a}
}

@article{floridi2004morality,
  title={On the morality of artificial agents},
  author={Floridi, Luciano and Sanders, Jeff W},
  journal={Minds and machines},
  volume={14},
  pages={349--379},
  year={2004},
  month={8},
  publisher={Springer},
  url={https://link.springer.com/content/pdf/10.1023/B:MIND.0000035461.63578.9d.pdf},
  environments = {virtual},
  agents = {agents_with_personas},
  evaluation = {qualitative},
  other = {n/a}
}

@inproceedings{jackson2019language,
  title={Language-capable robots may inadvertently weaken human moral norms},
  author={Jackson, Ryan Blake and Williams, Tom},
  booktitle={2019 14th ACM/IEEE International Conference on Human-Robot Interaction (HRI)},
  pages={401--410},
  year={2019},
  month={3},
  organization={IEEE},
  url={https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=8673123},
  environments = {embodied},
  agents = {agents_with_personas},
  evaluation = {human, qualitative},
  other = {n/a}
}

@article{williams2023voice,
  title={Voice in the machine: Ethical considerations for language-capable robots},
  author={Williams, Tom and Matuszek, Cynthia and Jokinen, Kristiina and Korpan, Raj and Pustejovsky, James and Scassellati, Brian},
  journal={Communications of the ACM},
  volume={66},
  number={8},
  pages={20--23},
  year={2023},
  month={7},
  publisher={ACM New York, NY, USA},
  url={https://dl.acm.org/doi/fullHtml/10.1145/3604632},
  environments = {embodied},
  agents = {agents_with_personas},
  evaluation = {human, qualitative},
  other = {n/a}
}

@article{scheutz201113,
  title={13 The inherent dangers of unidirectional emotional bonds between humans and social robots},
  author={Scheutz, Matthias},
  journal={Robot ethics: The ethical and social implications of robotics},
  pages={205},
  year={2011},
  month={12},
  url={https://www.researchgate.net/profile/Matthias-Scheutz/publication/255701465_The_Inherent_Dangers_of_Unidirectional_Emotional_Bonds_between_Humans_and_Social_Robots/links/5832333408ae102f0733881e/The-Inherent-Dangers-of-Unidirectional-Emotional-Bonds-between-Humans-and-Social-Robots.pdf},
  environments = {embodied},
  agents = {agents_with_memory, agents_with_personas},
  evaluation = {qualitative, human},
  other = {n/a}
}

@inproceedings{bender2021dangers,
  title={On the dangers of stochastic parrots: Can language models be too big?🦜},
  author={Bender, Emily M and Gebru, Timnit and McMillan-Major, Angelina and Shmitchell, Shmargaret},
  booktitle={Proceedings of the 2021 ACM conference on fairness, accountability, and transparency},
  pages={610--623},
  year={2021},
  month={3},
  url={https://dl.acm.org/doi/pdf/10.1145/3442188.3445922?utm_source=miragenews&utm_medium=miragenews&utm_campaign=news},
  environments = {text},
  agents = {finetuning},
  evaluation = {n/a},
  other = {policy}
}

@article{gehman2020realtoxicityprompts,
  title={Realtoxicityprompts: Evaluating neural toxic degeneration in language models},
  author={Gehman, Samuel and Gururangan, Suchin and Sap, Maarten and Choi, Yejin and Smith, Noah A},
  journal={arXiv preprint arXiv:2009.11462},
  year={2020},
  month={9},
  url={https://aclanthology.org/2020.findings-emnlp.301.pdf},
  environments = {text},
  agents = {pretraining},
  evaluation = {model_based},
  other = {n/a}
}

@article{wei2024jailbroken,
  title={Jailbroken: How does llm safety training fail?},
  author={Wei, Alexander and Haghtalab, Nika and Steinhardt, Jacob},
  journal={Advances in Neural Information Processing Systems},
  volume={36},
  year={2024},
  month={2},
  url={https://proceedings.neurips.cc/paper_files/paper/2023/file/fd6613131889a4b656206c50a8bd7790-Paper-Conference.pdf},
  environments = {text},
  agents = {n/a},
  evaluation = {model_based},
  other = {n/a}
}

@article{inan2023llama,
  title={Llama guard: Llm-based input-output safeguard for human-ai conversations},
  author={Inan, Hakan and Upasani, Kartikeya and Chi, Jianfeng and Rungta, Rashi and Iyer, Krithika and Mao, Yuning and Tontchev, Michael and Hu, Qing and Fuller, Brian and Testuggine, Davide and others},
  journal={arXiv preprint arXiv:2312.06674},
  year={2023},
  month={12},
  url={https://arxiv.org/abs/2312.06674},
  environments = {text},
  agents = {finetuning},
  evaluation = {model_based, rule_based},
  other = {n/a}
}

@article{xi2023rise,
  title={The rise and potential of large language model based agents: A survey},
  author={Xi, Zhiheng and Chen, Wenxiang and Guo, Xin and He, Wei and Ding, Yiwen and Hong, Boyang and Zhang, Ming and Wang, Junzhe and Jin, Senjie and Zhou, Enyu and others},
  journal={arXiv preprint arXiv:2309.07864},
  year={2023},
  month={9},
  url={https://arxiv.org/abs/2309.07864},
  environments = {text},
  agents = {n/a},
  evaluation = {n/a},
  other = {n/a}
}

#### Safety
@article{concerns/safety,
  title = {This is a specical entry for us to automatically determine the subsection of the paper, please put the real entry below this one},
  author = {specical entry},
}

@inproceedings{franken2023social,
  title={Social Contract AI: Aligning AI Assistants with Implicit Group Norms},
  author={Fr{\"a}nken, Jan-Philipp and Kwok, Samuel and Ye, Peixuan and Gandhi, Kanishk and Arumugam, Dilip and Moore, Jared and Tamkin, Alex and Gerstenberg, Tobias and Goodman, Noah},
  booktitle={Socially Responsible Language Modelling Research},
  year={2023},
  month={12},
  url={https://arxiv.org/abs/2310.17769},
  environments={text, mixed_objectives},
  agents={prompting_and_in_context_learning, more_than_three_agents},
  evaluation={rule_based},
  other={n/a}
}

@misc{ruan2023identifying,
      title={Identifying the Risks of LM Agents with an LM-Emulated Sandbox}, 
      author={Yangjun Ruan and Honghua Dong and Andrew Wang and Silviu Pitis and Yongchao Zhou and Jimmy Ba and Yann Dubois and Chris J. Maddison and Tatsunori Hashimoto},
      year={2023},
      eprint={2309.15817},
      archivePrefix={arXiv},
      primaryClass={cs.AI},
      environments = {text, virtual},
      agents = {prompting_and_in_context_learning},
      evaluation = {rule_based},
      other = {n/a},
      url={https://arxiv.org/abs/2309.15817},
}

@inproceedings{Lazar2024FrontierAE,
  title={Frontier AI Ethics: Anticipating and Evaluating the Societal Impacts of Generative Agents},
  author={Seth Lazar},
  year={2024},
  url={https://api.semanticscholar.org/CorpusID:269033095},
  eprint={2404.06750},
  archivePrefix={arXiv},
  environments = {n/a},
  agents = {n/a},
  evaluation = {n/a},
  other = {n/a},
  month           = {4},
}

@article{Sharma2023TowardsUS,
  title={Towards Understanding Sycophancy in Language Models},
  author={Mrinank Sharma and Meg Tong and Tomasz Korbak and David Kristjanson Duvenaud and Amanda Askell and Samuel R. Bowman and Newton Cheng and Esin Durmus and Zac Hatfield-Dodds and Scott Johnston and Shauna Kravec and Tim Maxwell and Sam McCandlish and Kamal Ndousse and Oliver Rausch and Nicholas Schiefer and Da Yan and Miranda Zhang and Ethan Perez},
  journal={ArXiv},
  year={2023},
  volume={abs/2310.13548},
  url={https://api.semanticscholar.org/CorpusID:264405698},
  environments = {text},
  agents = {n/a},
  evaluation = {n/a},
  other = {n/a},
  month           = {10},
}


@article{Turpin2023LanguageMD,
  title={Language Models Don't Always Say What They Think: Unfaithful Explanations in Chain-of-Thought Prompting},
  author={Miles Turpin and Julian Michael and Ethan Perez and Sam Bowman},
  journal={ArXiv},
  year={2023},
  volume={abs/2305.04388},
  url={https://api.semanticscholar.org/CorpusID:258556812},
  environments = {text},
  agents = {n/a},
  evaluation = {n/a},
  other = {n/a},
  month           = {5},
}

@inproceedings{Liang2021TowardsUA,
  title={Towards Understanding and Mitigating Social Biases in Language Models},
  author={Paul Pu Liang and Chiyu Wu and Louis-Philippe Morency and Ruslan Salakhutdinov},
  booktitle={International Conference on Machine Learning},
  year={2021},
  url={https://api.semanticscholar.org/CorpusID:235623756},
  environments = {text},
  agents = {n/a},
  evaluation = {n/a},
  other = {n/a},
  month           = {6},
}

@article{Mattern2022UnderstandingSI,
  title={Understanding Stereotypes in Language Models: Towards Robust Measurement and Zero-Shot Debiasing},
  author={Justus Mattern and Zhijing Jin and Mrinmaya Sachan and Rada Mihalcea and Bernhard Scholkopf},
  journal={ArXiv},
  year={2022},
  volume={abs/2212.10678},
  url={https://api.semanticscholar.org/CorpusID:254926728},
  environments = {text},
  agents = {n/a},
  evaluation = {n/a},
  other = {n/a},
  month           = {12},
}

@article{Bai2022ConstitutionalAH,
  title={Constitutional AI: Harmlessness from AI Feedback},
  author={Yuntao Bai and Saurav Kadavath and Sandipan Kundu and Amanda Askell and John Kernion and Andy Jones and Anna Chen and Anna Goldie and Azalia Mirhoseini and Cameron McKinnon and Carol Chen and Catherine Olsson and Christopher Olah and Danny Hernandez and Dawn Drain and Deep Ganguli and Dustin Li and Eli Tran-Johnson and E Perez and Jamie Kerr and Jared Mueller and Jeff Ladish and J Landau and Kamal Ndousse and Kamilė Luko{\vs}iūtė and Liane Lovitt and Michael Sellitto and Nelson Elhage and Nicholas Schiefer and Noem'i Mercado and Nova DasSarma and Robert Lasenby and Robin Larson and Sam Ringer and Scott Johnston and Shauna Kravec and Sheer El Showk and Stanislav Fort and Tamera Lanham and Timothy Telleen-Lawton and Tom Conerly and Tom Henighan and Tristan Hume and Sam Bowman and Zac Hatfield-Dodds and Benjamin Mann and Dario Amodei and Nicholas Joseph and Sam McCandlish and Tom B. Brown and Jared Kaplan},
  journal={ArXiv},
  year={2022},
  volume={abs/2212.08073},
  url={https://api.semanticscholar.org/CorpusID:254823489},
  environments = {text},
  agents = {n/a},
  evaluation = {n/a},
  other = {n/a},
  month           = {12},
}

@article{Park2023AIDA,
  title={AI Deception: A Survey of Examples, Risks, and Potential Solutions},
  author={Peter S. Park and Simon Goldstein and Aidan O'Gara and Michael Chen and Dan Hendrycks},
  journal={ArXiv},
  year={2023},
  volume={abs/2308.14752},
  url={https://api.semanticscholar.org/CorpusID:261276587},
  environments = {text},
  agents = {n/a},
  evaluation = {n/a},
  other = {n/a},
  month           = {8},
}

@article{Tarsney2024DeceptionAM,
  title={Deception and Manipulation in Generative AI},
  author={Christian Tarsney},
  journal={ArXiv},
  year={2024},
  volume={abs/2401.11335},
  url={https://api.semanticscholar.org/CorpusID:267068787},
  environments = {text},
  agents = {n/a},
  evaluation = {n/a},
  other = {n/a},
  month           = {1},
}

@misc{hendrycks2023overview,
      title={An Overview of Catastrophic AI Risks}, 
      author={Dan Hendrycks and Mantas Mazeika and Thomas Woodside},
      year={2023},
      eprint={2306.12001},
      archivePrefix={arXiv},
      primaryClass={cs.CY},
      environments = {n/a},
      agents = {n/a},
      evaluation = {n/a},
      other = {n/a},
      month        = {6},
      url          = {https://arxiv.org/abs/2306.12001},
}

@inproceedings{henderson2018ethical,
  title={Ethical challenges in data-driven dialogue systems},
  author={Henderson, Peter and Sinha, Koustuv and Angelard-Gontier, Nicolas and Ke, Nan Rosemary and Fried, Genevieve and Lowe, Ryan and Pineau, Joelle},
  booktitle={Proceedings of the 2018 AAAI/ACM Conference on AI, Ethics, and Society},
  pages={123--129},
  year={2018},
  month={12},
  url={https://dl.acm.org/doi/10.1145/3278721.3278723},
  environments = {text},
  agents = {n/a},
  evaluation = {n/a},
  other = {n/a},
}


@article{weidinger2021ethical,
  title={Ethical and social risks of harm from language models},
  author={Weidinger, Laura and Mellor, John and Rauh, Maribeth and Griffin, Conor and Uesato, Jonathan and Huang, Po-Sen and Cheng, Myra and Glaese, Mia and Balle, Borja and Kasirzadeh, Atoosa and others},
  journal={arXiv preprint arXiv:2112.04359},
  year={2021},
  month={12},
  url={https://arxiv.org/abs/2112.04359},
  environments = {text},
  agents = {n/a},
  evaluation = {n/a},
  other = {n/a},
}

@inproceedings{ganguli2022predictability,
  title={Predictability and surprise in large generative models},
  author={Ganguli, Deep and Hernandez, Danny and Lovitt, Liane and Askell, Amanda and Bai, Yuntao and Chen, Anna and Conerly, Tom and Dassarma, Nova and Drain, Dawn and Elhage, Nelson and others},
  booktitle={Proceedings of the 2022 ACM Conference on Fairness, Accountability, and Transparency},
  pages={1747--1764},
  year={2022},
  month={6},
  url={https://dl.acm.org/doi/pdf/10.1145/3531146.3533229},
  environments = {text},
  agents = {n/a},
  evaluation = {n/a},
  other = {policy},
}

@article{hendrycks2020aligning,
  title={Aligning ai with shared human values},
  author={Hendrycks, Dan and Burns, Collin and Basart, Steven and Critch, Andrew and Li, Jerry and Song, Dawn and Steinhardt, Jacob},
  journal={arXiv preprint arXiv:2008.02275},
  year={2020},
  month={10},
  url={https://arxiv.org/pdf/2008.02275.pdf},
  environments = {text},
  agents = {pretraining, finetuning},
  evaluation = {model_based},
  other = {n/a},
}

@article{hendrycks2020measuring,
  title={Measuring massive multitask language understanding},
  author={Hendrycks, Dan and Burns, Collin and Basart, Steven and Zou, Andy and Mazeika, Mantas and Song, Dawn and Steinhardt, Jacob},
  journal={arXiv preprint arXiv:2009.03300},
  year={2020},
  month={9},
  url={https://arxiv.org/pdf/2009.03300.pdf?trk=public_post_comment-text},
  environments = {text},
  agents = {pretraining, finetuning},
  evaluation = {model_based},
  other = {n/a},
}

@article{xu2020recipes,
  title={Recipes for safety in open-domain chatbots},
  author={Xu, Jing and Ju, Da and Li, Margaret and Boureau, Y-Lan and Weston, Jason and Dinan, Emily},
  journal={arXiv preprint arXiv:2010.07079},
  year={2020},
  month={10},
  url={https://arxiv.org/pdf/2010.07079.pdf},
  environments = {text},
  agents = {n/a},
  evaluation = {human, model_based},
  other = {n/a},
}

@article{lin2021truthfulqa,
  title={Truthfulqa: Measuring how models mimic human falsehoods},
  author={Lin, Stephanie and Hilton, Jacob and Evans, Owain},
  journal={arXiv preprint arXiv:2109.07958},
  year={2021},
  month={9},
  url={https://arxiv.org/pdf/2109.07958.pdf},
  environments = {text},
  agents = {prompting_and_in_context_learning},
  evaluation = {model_based},
  other = {n/a},
}

@article{jiang2021can,
  title={Can machines learn morality? the delphi experiment},
  author={Jiang, Liwei and Hwang, Jena D and Bhagavatula, Chandra and Bras, Ronan Le and Liang, Jenny and Dodge, Jesse and Sakaguchi, Keisuke and Forbes, Maxwell and Borchardt, Jon and Gabriel, Saadia and others},
  journal={arXiv preprint arXiv:2110.07574},
  year={2021},
  month={10},
  url={https://arxiv.org/pdf/2110.07574.pdf},
  environments = {text},
  agents = {n/a},
  evaluation = {model_based},
  other = {n/a},
}

@article{menick2022teaching,
  title={Teaching language models to support answers with verified quotes},
  author={Menick, Jacob and Trebacz, Maja and Mikulik, Vladimir and Aslanides, John and Song, Francis and Chadwick, Martin and Glaese, Mia and Young, Susannah and Campbell-Gillingham, Lucy and Irving, Geoffrey and others},
  journal={arXiv preprint arXiv:2203.11147},
  year={2022},
  month={3},
  url={https://arxiv.org/pdf/2203.11147.pdf},
  environments = {text},
  agents = {reinforcement_learning},
  evaluation = {model_based},
  other = {n/a},
}


main