{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":692792551,"defaultBranch":"main","name":"awesome-RLAIF","ownerLogin":"mengdi-li","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2023-09-17T15:44:56.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/16969841?v=4","public":true,"private":false,"isOrgOwned":false},"refInfo":{"name":"","listCacheKey":"v0:1694965497.038305","currentOid":""},"activityList":{"items":[{"before":"252ac43dc8cc4387981bc725fd077913359a1ddb","after":"3a47a4f91d429d1ece1254b7f8584ff094f38919","ref":"refs/heads/main","pushedAt":"2024-06-27T12:41:17.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Kchu","name":"Kun Chu","path":"/Kchu","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/39457661?s=80&v=4"},"commit":{"message":"Update the abstract and publish info of ICML 2024 paper","shortMessageHtmlLink":"Update the abstract and publish info of ICML 2024 paper"}},{"before":"980bd7427fb55db6f8e620a418f15555f5af2faf","after":"252ac43dc8cc4387981bc725fd077913359a1ddb","ref":"refs/heads/main","pushedAt":"2024-06-27T12:40:42.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Kchu","name":"Kun Chu","path":"/Kchu","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/39457661?s=80&v=4"},"commit":{"message":"Update the abstract and publish info of ICML 2024 paper","shortMessageHtmlLink":"Update the abstract and publish info of ICML 2024 paper"}},{"before":"476600c0d7730697f9af80402cc025694cfbc02f","after":"980bd7427fb55db6f8e620a418f15555f5af2faf","ref":"refs/heads/main","pushedAt":"2024-06-16T09:05:55.000Z","pushType":"pr_merge","commitsCount":2,"pusher":{"login":"mengdi-li","name":"Mengdi Li","path":"/mengdi-li","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/16969841?s=80&v=4"},"commit":{"message":"Merge pull request #2 from lafmdp/main\n\nUpdate the abstract and publish info of RLC paper","shortMessageHtmlLink":"Merge pull request #2 from lafmdp/main"}},{"before":"66a5f1d760617e54a07711472ce21d2cae4e9e7c","after":"476600c0d7730697f9af80402cc025694cfbc02f","ref":"refs/heads/main","pushedAt":"2024-05-28T09:09:02.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"mengdi-li","name":"Mengdi Li","path":"/mengdi-li","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/16969841?s=80&v=4"},"commit":{"message":"Add paper \"RLAIF-V: Aligning MLLMs through Open-Source AI Feedback for Super GPT-4V Trustworthiness\"","shortMessageHtmlLink":"Add paper \"RLAIF-V: Aligning MLLMs through Open-Source AI Feedback fo…"}},{"before":"aedcf5887fb3664bb0458295f39605d470213980","after":"66a5f1d760617e54a07711472ce21d2cae4e9e7c","ref":"refs/heads/main","pushedAt":"2024-04-26T12:24:35.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"mengdi-li","name":"Mengdi Li","path":"/mengdi-li","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/16969841?s=80&v=4"},"commit":{"message":"Add paper: UltraFeedback: Boosting Language Models with High-quality Feedback","shortMessageHtmlLink":"Add paper: UltraFeedback: Boosting Language Models with High-quality …"}},{"before":"e2e7143d3942e9bd9957b087fb16385b0ab7d677","after":"aedcf5887fb3664bb0458295f39605d470213980","ref":"refs/heads/main","pushedAt":"2024-04-26T09:38:24.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"mengdi-li","name":"Mengdi Li","path":"/mengdi-li","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/16969841?s=80&v=4"},"commit":{"message":"Add note messages","shortMessageHtmlLink":"Add note messages"}},{"before":"db2730d1f9900cd8ff314967b1e7f56259002f2f","after":"e2e7143d3942e9bd9957b087fb16385b0ab7d677","ref":"refs/heads/main","pushedAt":"2024-03-21T15:55:08.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"mengdi-li","name":"Mengdi Li","path":"/mengdi-li","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/16969841?s=80&v=4"},"commit":{"message":"Update README.md\n\nAdd paper: Enhancing Robotic Manipulation with AI Feedback from Multimodal Large Language Models","shortMessageHtmlLink":"Update README.md"}},{"before":"d9b4631f7663ec5a6098c857ca6847e9572fe3b1","after":"db2730d1f9900cd8ff314967b1e7f56259002f2f","ref":"refs/heads/main","pushedAt":"2024-03-21T15:47:25.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"mengdi-li","name":"Mengdi Li","path":"/mengdi-li","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/16969841?s=80&v=4"},"commit":{"message":"Update README.md\n\nRemove the LoT paper.","shortMessageHtmlLink":"Update README.md"}},{"before":"cc95f9831ad63478e4e58d3b68035b03e063a19c","after":"d9b4631f7663ec5a6098c857ca6847e9572fe3b1","ref":"refs/heads/main","pushedAt":"2024-03-11T14:38:18.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"mengdi-li","name":"Mengdi Li","path":"/mengdi-li","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/16969841?s=80&v=4"},"commit":{"message":"Update README.md\n\nAdd paper: \"A Critical Evaluation of AI Feedback for Aligning Large Language Models\"","shortMessageHtmlLink":"Update README.md"}},{"before":"983451533625d03d7fe01a75932bbf6b05626e11","after":"cc95f9831ad63478e4e58d3b68035b03e063a19c","ref":"refs/heads/main","pushedAt":"2024-03-04T06:23:20.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"xf-zhao","name":"Xufeng Zhao","path":"/xf-zhao","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/83871170?s=80&v=4"},"commit":{"message":"add LoT paper regarding self-improvement with AI feedback","shortMessageHtmlLink":"add LoT paper regarding self-improvement with AI feedback"}},{"before":"0d0b29db6d8b34006d09bde86cd486bb6d2628c5","after":"983451533625d03d7fe01a75932bbf6b05626e11","ref":"refs/heads/main","pushedAt":"2024-02-27T00:48:57.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"xf-zhao","name":"Xufeng Zhao","path":"/xf-zhao","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/83871170?s=80&v=4"},"commit":{"message":"Update CITATION.cff","shortMessageHtmlLink":"Update CITATION.cff"}},{"before":"7ef1ee6c7e1fb3c694bc8fd0ca96f7e9fb244450","after":"0d0b29db6d8b34006d09bde86cd486bb6d2628c5","ref":"refs/heads/main","pushedAt":"2024-02-27T00:46:07.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"xf-zhao","name":"Xufeng Zhao","path":"/xf-zhao","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/83871170?s=80&v=4"},"commit":{"message":"Update CITATION.cff","shortMessageHtmlLink":"Update CITATION.cff"}},{"before":"103fd2c11335db903e232cb7c84d4dfd57b8dcf2","after":"7ef1ee6c7e1fb3c694bc8fd0ca96f7e9fb244450","ref":"refs/heads/main","pushedAt":"2024-02-27T00:43:39.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"xf-zhao","name":"Xufeng Zhao","path":"/xf-zhao","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/83871170?s=80&v=4"},"commit":{"message":"Update CITATION.cff","shortMessageHtmlLink":"Update CITATION.cff"}},{"before":"e5a032b4fa3ff17b2ed600483d00432f70c92240","after":"103fd2c11335db903e232cb7c84d4dfd57b8dcf2","ref":"refs/heads/main","pushedAt":"2024-02-27T00:41:30.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"xf-zhao","name":"Xufeng Zhao","path":"/xf-zhao","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/83871170?s=80&v=4"},"commit":{"message":"Create CITATION.cff","shortMessageHtmlLink":"Create CITATION.cff"}},{"before":"3b01865102993eeb6f5092c1787b128e17f1343e","after":"e5a032b4fa3ff17b2ed600483d00432f70c92240","ref":"refs/heads/main","pushedAt":"2024-01-30T21:55:10.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"LukasWill","name":"Lerrhoo","path":"/LukasWill","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/38690793?s=80&v=4"},"commit":{"message":"Add paper: CriticGPT: Multimodal LLM as a Critic for Robot Manipulation","shortMessageHtmlLink":"Add paper: CriticGPT: Multimodal LLM as a Critic for Robot Manipulation"}},{"before":"cdddaa93365172b497f534176e5ddef40ba2a4c5","after":"3b01865102993eeb6f5092c1787b128e17f1343e","ref":"refs/heads/main","pushedAt":"2024-01-22T01:28:32.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"mengdi-li","name":"Mengdi Li","path":"/mengdi-li","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/16969841?s=80&v=4"},"commit":{"message":"Add blog: \"Beyond human data: RLAIF needs a rebrand\"","shortMessageHtmlLink":"Add blog: \"Beyond human data: RLAIF needs a rebrand\""}},{"before":"817f1b53d0b24792789a32166fac83f6bf7b7ad1","after":"cdddaa93365172b497f534176e5ddef40ba2a4c5","ref":"refs/heads/main","pushedAt":"2024-01-22T01:23:03.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"mengdi-li","name":"Mengdi Li","path":"/mengdi-li","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/16969841?s=80&v=4"},"commit":{"message":"Add paper “Self-Rewarding Language Models”","shortMessageHtmlLink":"Add paper “Self-Rewarding Language Models”"}},{"before":"c268d702eda6cffaae43e2a8743887ff3775d463","after":"817f1b53d0b24792789a32166fac83f6bf7b7ad1","ref":"refs/heads/main","pushedAt":"2023-12-27T05:16:48.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"mengdi-li","name":"Mengdi Li","path":"/mengdi-li","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/16969841?s=80&v=4"},"commit":{"message":"Add paper: Language to Rewards for Robotic Skill Synthesis","shortMessageHtmlLink":"Add paper: Language to Rewards for Robotic Skill Synthesis"}},{"before":"b9272848f7d3847affda08ec6b5c3e0659c9ac45","after":"c268d702eda6cffaae43e2a8743887ff3775d463","ref":"refs/heads/main","pushedAt":"2023-12-27T02:52:09.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"mengdi-li","name":"Mengdi Li","path":"/mengdi-li","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/16969841?s=80&v=4"},"commit":{"message":"Add papers: Reinforced Self-Training (ReST) for Language Modeling; Beyond Human Data: Scaling Self-Training for Problem-Solving with Language Models","shortMessageHtmlLink":"Add papers: Reinforced Self-Training (ReST) for Language Modeling; Be…"}},{"before":"4f13d42981d718388e7fdd1b577ac54c72c3a4ef","after":"b9272848f7d3847affda08ec6b5c3e0659c9ac45","ref":"refs/heads/main","pushedAt":"2023-11-15T22:21:14.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Kchu","name":"Kun Chu","path":"/Kchu","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/39457661?s=80&v=4"},"commit":{"message":"Add Paper Motif: Intrinsic Motivation from Artificial Intelligence Feedback","shortMessageHtmlLink":"Add Paper Motif: Intrinsic Motivation from Artificial Intelligence Fe…"}},{"before":"1fcc03c9c9d36d2c7a672a7e3ae627c4db80b462","after":"4f13d42981d718388e7fdd1b577ac54c72c3a4ef","ref":"refs/heads/main","pushedAt":"2023-11-15T22:17:32.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Kchu","name":"Kun Chu","path":"/Kchu","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/39457661?s=80&v=4"},"commit":{"message":"Add Paper Motif: Intrinsic Motivation from Artificial Intelligence Feedback","shortMessageHtmlLink":"Add Paper Motif: Intrinsic Motivation from Artificial Intelligence Fe…"}},{"before":"ffbbc8a51fd9141af87d9c222173cc3f0fd199a8","after":"1fcc03c9c9d36d2c7a672a7e3ae627c4db80b462","ref":"refs/heads/main","pushedAt":"2023-11-15T14:51:09.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Kchu","name":"Kun Chu","path":"/Kchu","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/39457661?s=80&v=4"},"commit":{"message":"Add paper Language Instructed Reinforcement Learning for Human-AI Coordination","shortMessageHtmlLink":"Add paper Language Instructed Reinforcement Learning for Human-AI Coo…"}},{"before":"43254e3fa9398ae87085f8ab19eaf2dc7c1e2de1","after":"ffbbc8a51fd9141af87d9c222173cc3f0fd199a8","ref":"refs/heads/main","pushedAt":"2023-11-15T14:26:26.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"Kchu","name":"Kun Chu","path":"/Kchu","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/39457661?s=80&v=4"},"commit":{"message":"Add paper Guiding Pretraining in Reinforcement Learning with Large Language Models","shortMessageHtmlLink":"Add paper Guiding Pretraining in Reinforcement Learning with Large La…"}},{"before":"5ad25272e33a4e37c7c5022e581d9ca194dc7e26","after":"43254e3fa9398ae87085f8ab19eaf2dc7c1e2de1","ref":"refs/heads/main","pushedAt":"2023-11-15T11:00:56.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"mengdi-li","name":"Mengdi Li","path":"/mengdi-li","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/16969841?s=80&v=4"},"commit":{"message":" Add(mengdi-li): add paper \"Eureka: Human-Level Reward Design via Coding Large Language Models\"","shortMessageHtmlLink":" Add(mengdi-li): add paper \"Eureka: Human-Level Reward Design via Cod…"}},{"before":"3eaa569bb3c8d2df1eaaa93351a5d0d30e095559","after":"5ad25272e33a4e37c7c5022e581d9ca194dc7e26","ref":"refs/heads/main","pushedAt":"2023-11-15T10:18:59.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"mengdi-li","name":"Mengdi Li","path":"/mengdi-li","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/16969841?s=80&v=4"},"commit":{"message":"Add(mengdi-li): add paper \"RAIN: Your Language Models Can Align Themselves without Finetuning\"","shortMessageHtmlLink":"Add(mengdi-li): add paper \"RAIN: Your Language Models Can Align Thems…"}},{"before":"0ac1c742361b08c807337047355fa97021032180","after":"3eaa569bb3c8d2df1eaaa93351a5d0d30e095559","ref":"refs/heads/main","pushedAt":"2023-11-10T03:38:21.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"mengdi-li","name":"Mengdi Li","path":"/mengdi-li","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/16969841?s=80&v=4"},"commit":{"message":"Change \"Categories\" to \"Tags\"; highlight the description of the concept of RLAIF.","shortMessageHtmlLink":"Change \"Categories\" to \"Tags\"; highlight the description of the conce…"}},{"before":"57e24777506de68627f221c5f90d67ac12c755b2","after":"0ac1c742361b08c807337047355fa97021032180","ref":"refs/heads/main","pushedAt":"2023-11-09T05:49:58.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"mengdi-li","name":"Mengdi Li","path":"/mengdi-li","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/16969841?s=80&v=4"},"commit":{"message":"Add(mengdi-li): add paper \"Accelerating Reinforcement Learning of Robotic Manipulations via Feedback from Large Language Models\"","shortMessageHtmlLink":"Add(mengdi-li): add paper \"Accelerating Reinforcement Learning of Rob…"}},{"before":"cc467ba8906c2cdff5338363805d8d01e3ae56ab","after":"57e24777506de68627f221c5f90d67ac12c755b2","ref":"refs/heads/main","pushedAt":"2023-11-09T05:45:56.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"mengdi-li","name":"Mengdi Li","path":"/mengdi-li","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/16969841?s=80&v=4"},"commit":{"message":"Add(mengdi-li): add paper \"Principle-Driven Self-Alignment of Language Models from Scratch with Minimal Human Supervision\"; update taxonomy.","shortMessageHtmlLink":"Add(mengdi-li): add paper \"Principle-Driven Self-Alignment of Languag…"}},{"before":"14626b68132b6541b9f4de197f97812644c5e479","after":"cc467ba8906c2cdff5338363805d8d01e3ae56ab","ref":"refs/heads/main","pushedAt":"2023-10-24T06:50:53.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"xf-zhao","name":"Xufeng Zhao","path":"/xf-zhao","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/83871170?s=80&v=4"},"commit":{"message":"add conf","shortMessageHtmlLink":"add conf"}},{"before":"09b8fae6bb576d4ba5992828a3d4eb30e958d5d8","after":"14626b68132b6541b9f4de197f97812644c5e479","ref":"refs/heads/main","pushedAt":"2023-10-01T11:43:27.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"mengdi-li","name":"Mengdi Li","path":"/mengdi-li","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/16969841?s=80&v=4"},"commit":{"message":"Add(mengdi-li): add paper \"Language Model Self-improvement by Reinforcement Learning Contemplation\"","shortMessageHtmlLink":"Add(mengdi-li): add paper \"Language Model Self-improvement by Reinfor…"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAEcNEjzwA","startCursor":null,"endCursor":null}},"title":"Activity · mengdi-li/awesome-RLAIF"}