From 930c745ff63cba1811ebb33355d8654df75cd4bc Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Sun, 30 Jul 2023 22:29:54 +0000 Subject: [PATCH 01/11] add first draft --- joss/paper.bib | 59 +++++++++++++++++++++++++++++++++++++++++ joss/paper.md | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 joss/paper.bib create mode 100644 joss/paper.md diff --git a/joss/paper.bib b/joss/paper.bib new file mode 100644 index 000000000..72e397796 --- /dev/null +++ b/joss/paper.bib @@ -0,0 +1,59 @@ +@article{Pearson:2017, + url = {http://adsabs.harvard.edu/abs/2017arXiv170304627P}, + Archiveprefix = {arXiv}, + Author = {{Pearson}, S. and {Price-Whelan}, A.~M. and {Johnston}, K.~V.}, + Eprint = {1703.04627}, + Journal = {ArXiv e-prints}, + Keywords = {Astrophysics - Astrophysics of Galaxies}, + Month = mar, + Title = {{Gaps in Globular Cluster Streams: Pal 5 and the Galactic Bar}}, + Year = 2017 +} + +@book{Binney:2008, + url = {http://adsabs.harvard.edu/abs/2008gady.book.....B}, + Author = {{Binney}, J. and {Tremaine}, S.}, + Booktitle = {Galactic Dynamics: Second Edition, by James Binney and Scott Tremaine.~ISBN 978-0-691-13026-2 (HB).~Published by Princeton University Press, Princeton, NJ USA, 2008.}, + Publisher = {Princeton University Press}, + Title = {{Galactic Dynamics: Second Edition}}, + Year = 2008 +} + +@article{gaia, + author = {{Gaia Collaboration}}, + title = "{The Gaia mission}", + journal = {Astronomy and Astrophysics}, + archivePrefix = "arXiv", + eprint = {1609.04153}, + primaryClass = "astro-ph.IM", + keywords = {space vehicles: instruments, Galaxy: structure, astrometry, parallaxes, proper motions, telescopes}, + year = 2016, + month = nov, + volume = 595, + doi = {10.1051/0004-6361/201629272}, + url = {http://adsabs.harvard.edu/abs/2016A%26A...595A...1G}, +} + +@article{astropy, + author = {{Astropy Collaboration}}, + title = "{Astropy: A community Python package for astronomy}", + journal = {Astronomy and Astrophysics}, + archivePrefix = "arXiv", + eprint = {1307.6212}, + primaryClass = "astro-ph.IM", + keywords = {methods: data analysis, methods: miscellaneous, virtual observatory tools}, + year = 2013, + month = oct, + volume = 558, + doi = {10.1051/0004-6361/201322068}, + url = {http://adsabs.harvard.edu/abs/2013A%26A...558A..33A} +} + +@misc{fidgit, + author = {A. M. Smith and K. Thaney and M. Hahnel}, + title = {Fidgit: An ungodly union of GitHub and Figshare}, + year = {2020}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/arfon/fidgit} +} \ No newline at end of file diff --git a/joss/paper.md b/joss/paper.md new file mode 100644 index 000000000..8542b2ca0 --- /dev/null +++ b/joss/paper.md @@ -0,0 +1,72 @@ +--- +title: 'elk: A Python package to elicit latent knowledge from LLMs' +tags: + - python + - machine leaarning + - interpretability + - ai alignment + - honest AI +authors: + - name: Nora Belrose + affiliation: 1 + - name: Walter Laurito + corresponding: true # (This is how to denote the corresponding author) + affiliation: 2 + - name: Alex Mallen + affiliation: 1 + - name: Author with no affiliation + affiliation: 3 +affiliations: + - name: EleutherAI + index: 1 + - name: FZI Research Center for Information Technology, Germany + index: 2 + - name: EleutherAI + index: 3 +date: 13 August 2017 +bibliography: paper.bib + +# Optional fields if submitting to a AAS journal too, see this blog post: +# https://blog.joss.theoj.org/2018/12/a-new-collaboration-with-aas-publishing +aas-doi: 10.3847/xxxxx <- update this with the DOI from AAS once you know it. +aas-journal: Astrophysical Journal <- The name of the AAS journal. +--- + +# Summary + +`elk` is a library designed to elicit latent knowledge ([ELK](`https://docs.google.com/document/d/1WwsnJQstPq91_Yh-Ch2XRL8H_EpsnjrC1dwZXR37PC8/edit`) [@author:elk]) from language models. It includes implementations of both the original and an enhanced version of the CSS method, as well as an approach based on the CRC method. Designed for researchers, `elk` offers features such as multi-GPU support, integration with Huggingface, and continuous improvement by a dedicated team. The Eleuther AI Discord's `elk` channel provides a platform for collaboration and discussion related to the library and associated research. + +# Statement of need + +Language models are proficient at predicting successive tokens in a sequence of text. However, they often inadvertently mirror human errors and misconceptions, even when equipped with the capability to "know better." This behavior becomes particularly concerning when models are trained to generate text that is highly rated by human evaluators, leading to the potential output of erroneous statements that may go undetected. Our solution is to directly Elicit Latent Knowledge (ELK) from within the activations of a language model to mitigate this challenge. + +`elk` is a specialized library developed to provide both the original and an enhanced version of the CSS methodology. Described in the paper "Discovering Latent Knowledge in Language Models Without Supervision" by Burns et al. [@author:burns], the CSS method has been instrumental in our understanding of language models. In addition, we have implemented an approach based on the Contrastive Representation Clustering (CRC) method (2022) from the same paper. The CRC technique allows for the discovery of features in the hidden states of a language model that adhere to specific logical consistency requirements. Interestingly, these features have proven to be highly effective for question-answering and text classification tasks, even when trained without labels. + +Designed with the research community in mind, elk serves as a powerful tool for those seeking to investigate the veracity of model output and explore the underlying beliefs embedded within the model. The library offers: + +Multi-GPU Support: Efficient extraction, training, and evaluation through parallel processing. +Integration with Huggingface: Easy utilization of models and datasets from a popular source. +Active Development and Support: Continuous improvement by a dedicated team of researchers and engineers. + +For collaboration, discussion, and support, the [Eleuther AI Discord's elk channel](https://discord.com/channels/729741769192767510/1070194752785489991) provides a platform for engaging with others interested in the library or related research projects. + + +# Citations + +Citations to entries in paper.bib should be in +[rMarkdown](http://rmarkdown.rstudio.com/authoring_bibliographies_and_citations.html) +format. + +If you want to cite a software repository URL (e.g. something on GitHub without a preferred +citation) then you can do it with the example BibTeX entry below for @fidgit. + +For a quick reference, the following citation commands can be used: +- `@author:2001` -> "Author et al. (2001)" +- `[@author:2001]` -> "(Author et al., 2001)" +- `[@author1:2001; @author2:2001]` -> "(Author1 et al., 2001; Author2 et al., 2002)" + + +# Acknowledgements + + +# References \ No newline at end of file From 5553fd38c947e906c069a55339154c59fd97aab7 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Mon, 31 Jul 2023 13:32:14 +0000 Subject: [PATCH 02/11] update draft --- joss/paper.bib | 62 ++++---------------------------------------------- joss/paper.md | 36 +++++++---------------------- 2 files changed, 13 insertions(+), 85 deletions(-) diff --git a/joss/paper.bib b/joss/paper.bib index 72e397796..93b035ff8 100644 --- a/joss/paper.bib +++ b/joss/paper.bib @@ -1,59 +1,7 @@ -@article{Pearson:2017, - url = {http://adsabs.harvard.edu/abs/2017arXiv170304627P}, - Archiveprefix = {arXiv}, - Author = {{Pearson}, S. and {Price-Whelan}, A.~M. and {Johnston}, K.~V.}, - Eprint = {1703.04627}, - Journal = {ArXiv e-prints}, - Keywords = {Astrophysics - Astrophysics of Galaxies}, - Month = mar, - Title = {{Gaps in Globular Cluster Streams: Pal 5 and the Galactic Bar}}, - Year = 2017 +@article{burns, + title={Discovering latent knowledge in language models without supervision}, + author={Burns, Collin and Ye, Haotian and Klein, Dan and Steinhardt, Jacob}, + journal={arXiv preprint arXiv:2212.03827}, + year={2022} } -@book{Binney:2008, - url = {http://adsabs.harvard.edu/abs/2008gady.book.....B}, - Author = {{Binney}, J. and {Tremaine}, S.}, - Booktitle = {Galactic Dynamics: Second Edition, by James Binney and Scott Tremaine.~ISBN 978-0-691-13026-2 (HB).~Published by Princeton University Press, Princeton, NJ USA, 2008.}, - Publisher = {Princeton University Press}, - Title = {{Galactic Dynamics: Second Edition}}, - Year = 2008 -} - -@article{gaia, - author = {{Gaia Collaboration}}, - title = "{The Gaia mission}", - journal = {Astronomy and Astrophysics}, - archivePrefix = "arXiv", - eprint = {1609.04153}, - primaryClass = "astro-ph.IM", - keywords = {space vehicles: instruments, Galaxy: structure, astrometry, parallaxes, proper motions, telescopes}, - year = 2016, - month = nov, - volume = 595, - doi = {10.1051/0004-6361/201629272}, - url = {http://adsabs.harvard.edu/abs/2016A%26A...595A...1G}, -} - -@article{astropy, - author = {{Astropy Collaboration}}, - title = "{Astropy: A community Python package for astronomy}", - journal = {Astronomy and Astrophysics}, - archivePrefix = "arXiv", - eprint = {1307.6212}, - primaryClass = "astro-ph.IM", - keywords = {methods: data analysis, methods: miscellaneous, virtual observatory tools}, - year = 2013, - month = oct, - volume = 558, - doi = {10.1051/0004-6361/201322068}, - url = {http://adsabs.harvard.edu/abs/2013A%26A...558A..33A} -} - -@misc{fidgit, - author = {A. M. Smith and K. Thaney and M. Hahnel}, - title = {Fidgit: An ungodly union of GitHub and Figshare}, - year = {2020}, - publisher = {GitHub}, - journal = {GitHub repository}, - url = {https://github.com/arfon/fidgit} -} \ No newline at end of file diff --git a/joss/paper.md b/joss/paper.md index 8542b2ca0..9cd13a5a2 100644 --- a/joss/paper.md +++ b/joss/paper.md @@ -26,47 +26,27 @@ affiliations: date: 13 August 2017 bibliography: paper.bib -# Optional fields if submitting to a AAS journal too, see this blog post: -# https://blog.joss.theoj.org/2018/12/a-new-collaboration-with-aas-publishing -aas-doi: 10.3847/xxxxx <- update this with the DOI from AAS once you know it. -aas-journal: Astrophysical Journal <- The name of the AAS journal. --- # Summary -`elk` is a library designed to elicit latent knowledge ([ELK](`https://docs.google.com/document/d/1WwsnJQstPq91_Yh-Ch2XRL8H_EpsnjrC1dwZXR37PC8/edit`) [@author:elk]) from language models. It includes implementations of both the original and an enhanced version of the CSS method, as well as an approach based on the CRC method. Designed for researchers, `elk` offers features such as multi-GPU support, integration with Huggingface, and continuous improvement by a dedicated team. The Eleuther AI Discord's `elk` channel provides a platform for collaboration and discussion related to the library and associated research. +`elk` is a library designed to elicit latent knowledge ([elk](`https://docs.google.com/document/d/1WwsnJQstPq91_Yh-Ch2XRL8H_EpsnjrC1dwZXR37PC8/`) [@author:elk]) from language models. It includes implementations of both the original and an enhanced version of the CSS method, as well as an approach based on the CRC method. Designed for researchers, `elk` offers features such as multi-GPU support, integration with Huggingface, and continuous improvement by a dedicated team. The Eleuther AI Discord's `elk` channel provides a platform for collaboration and discussion related to the library and associated research. # Statement of need -Language models are proficient at predicting successive tokens in a sequence of text. However, they often inadvertently mirror human errors and misconceptions, even when equipped with the capability to "know better." This behavior becomes particularly concerning when models are trained to generate text that is highly rated by human evaluators, leading to the potential output of erroneous statements that may go undetected. Our solution is to directly Elicit Latent Knowledge (ELK) from within the activations of a language model to mitigate this challenge. +Language models are proficient at predicting successive tokens in a sequence of text. However, they often inadvertently mirror human errors and misconceptions, even when equipped with the capability to "know better." This behavior becomes particularly concerning when models are trained to generate text that is highly rated by human evaluators, leading to the potential output of erroneous statements that may go undetected. Our solution is to directly elicit latent knowledge (([elk](`https://docs.google.com/document/d/1WwsnJQstPq91_Yh-Ch2XRL8H_EpsnjrC1dwZXR37PC8/edit`) [@author:elk]) from within the activations of a language model to mitigate this challenge. -`elk` is a specialized library developed to provide both the original and an enhanced version of the CSS methodology. Described in the paper "Discovering Latent Knowledge in Language Models Without Supervision" by Burns et al. [@author:burns], the CSS method has been instrumental in our understanding of language models. In addition, we have implemented an approach based on the Contrastive Representation Clustering (CRC) method (2022) from the same paper. The CRC technique allows for the discovery of features in the hidden states of a language model that adhere to specific logical consistency requirements. Interestingly, these features have proven to be highly effective for question-answering and text classification tasks, even when trained without labels. +`elk` is a specialized library developed to provide both the original and an enhanced version of the CSS methodology. Described in the paper "Discovering Latent Knowledge in Language Models Without Supervision" by Burns et al. [@author:burns]. In addition, we have implemented an approach based on the Contrastive Representation Clustering (CRC) method (2022) from the same paper. -Designed with the research community in mind, elk serves as a powerful tool for those seeking to investigate the veracity of model output and explore the underlying beliefs embedded within the model. The library offers: +`elk` serves as a tool for those seeking to investigate the veracity of model output and explore the underlying beliefs embedded within the model. The library offers: -Multi-GPU Support: Efficient extraction, training, and evaluation through parallel processing. -Integration with Huggingface: Easy utilization of models and datasets from a popular source. -Active Development and Support: Continuous improvement by a dedicated team of researchers and engineers. +- Multi-GPU Support: Efficient extraction, training, and evaluation through parallel processing. +- Integration with Huggingface: Easy utilization of models and datasets from a popular source. +- Active Development and Support: Continuous improvement by a dedicated team of researchers and engineers. For collaboration, discussion, and support, the [Eleuther AI Discord's elk channel](https://discord.com/channels/729741769192767510/1070194752785489991) provides a platform for engaging with others interested in the library or related research projects. - -# Citations - -Citations to entries in paper.bib should be in -[rMarkdown](http://rmarkdown.rstudio.com/authoring_bibliographies_and_citations.html) -format. - -If you want to cite a software repository URL (e.g. something on GitHub without a preferred -citation) then you can do it with the example BibTeX entry below for @fidgit. - -For a quick reference, the following citation commands can be used: -- `@author:2001` -> "Author et al. (2001)" -- `[@author:2001]` -> "(Author et al., 2001)" -- `[@author1:2001; @author2:2001]` -> "(Author1 et al., 2001; Author2 et al., 2002)" - - # Acknowledgements - +We want to thank [SERI MATS](https://www.serimats.org/) and [EleutherAI](https://www.eleuther.ai/) for supporting this work. # References \ No newline at end of file From e994b437f2d6a10f41683fd41718a6751f5c748f Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Mon, 31 Jul 2023 16:50:22 +0000 Subject: [PATCH 03/11] cleanup aknowledgements --- joss/paper.md | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/joss/paper.md b/joss/paper.md index 9cd13a5a2..420a60f46 100644 --- a/joss/paper.md +++ b/joss/paper.md @@ -10,27 +10,37 @@ authors: - name: Nora Belrose affiliation: 1 - name: Walter Laurito - corresponding: true # (This is how to denote the corresponding author) + corresponding: true affiliation: 2 - name: Alex Mallen affiliation: 1 - - name: Author with no affiliation + - name: Fabien Roger affiliation: 3 + - name: Kay Kozaronek + affiliation: 4 + - name: Christy Koh + affiliation: 5 + - name: Jonathan NG + affiliation: 6 + - name: Reagan Lee + affiliation: 7 + - name: Alex Wan + affiliation: 8 affiliations: - name: EleutherAI index: 1 - name: FZI Research Center for Information Technology, Germany index: 2 - - name: EleutherAI + - name: Redwood Research index: 3 -date: 13 August 2017 +date: 31 July 2023 bibliography: paper.bib --- # Summary -`elk` is a library designed to elicit latent knowledge ([elk](`https://docs.google.com/document/d/1WwsnJQstPq91_Yh-Ch2XRL8H_EpsnjrC1dwZXR37PC8/`) [@author:elk]) from language models. It includes implementations of both the original and an enhanced version of the CSS method, as well as an approach based on the CRC method. Designed for researchers, `elk` offers features such as multi-GPU support, integration with Huggingface, and continuous improvement by a dedicated team. The Eleuther AI Discord's `elk` channel provides a platform for collaboration and discussion related to the library and associated research. +`elk` is a library designed to elicit latent knowledge ([elk](`https://docs.google.com/document/d/1WwsnJQstPq91_Yh-Ch2XRL8H_EpsnjrC1dwZXR37PC8/`) [@author:elk]) from language models. It includes implementations of both the original and an enhanced version of the CSS method, as well as an approach based on the CRC method [@author:burns]. Designed for researchers, `elk` offers features such as multi-GPU support, integration with Huggingface, and continuous improvement by a dedicated group of people. The Eleuther AI Discord's `elk` channel provides a platform for collaboration and discussion related to the library and associated research. # Statement of need @@ -47,6 +57,4 @@ Language models are proficient at predicting successive tokens in a sequence of For collaboration, discussion, and support, the [Eleuther AI Discord's elk channel](https://discord.com/channels/729741769192767510/1070194752785489991) provides a platform for engaging with others interested in the library or related research projects. # Acknowledgements -We want to thank [SERI MATS](https://www.serimats.org/) and [EleutherAI](https://www.eleuther.ai/) for supporting this work. - -# References \ No newline at end of file +We would like to thank [SERI MATS](https://www.serimats.org/) and [EleutherAI](https://www.eleuther.ai/) for supporting our work. \ No newline at end of file From e97ceb965620dda711ffc76fe9fbbcc0820b4791 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Mon, 31 Jul 2023 17:59:34 +0100 Subject: [PATCH 04/11] Update paper.md --- joss/paper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/joss/paper.md b/joss/paper.md index 420a60f46..5f63b3bfd 100644 --- a/joss/paper.md +++ b/joss/paper.md @@ -57,4 +57,4 @@ Language models are proficient at predicting successive tokens in a sequence of For collaboration, discussion, and support, the [Eleuther AI Discord's elk channel](https://discord.com/channels/729741769192767510/1070194752785489991) provides a platform for engaging with others interested in the library or related research projects. # Acknowledgements -We would like to thank [SERI MATS](https://www.serimats.org/) and [EleutherAI](https://www.eleuther.ai/) for supporting our work. \ No newline at end of file +We would like to thank [EleutherAI](https://www.eleuther.ai/) and [SERI MATS](https://www.serimats.org/) for supporting our work. From a4531266551758234dc5790ecbe7f43500c44cf6 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Mon, 31 Jul 2023 18:00:51 +0100 Subject: [PATCH 05/11] Update paper.md --- joss/paper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/joss/paper.md b/joss/paper.md index 5f63b3bfd..27cfb578b 100644 --- a/joss/paper.md +++ b/joss/paper.md @@ -46,7 +46,7 @@ bibliography: paper.bib Language models are proficient at predicting successive tokens in a sequence of text. However, they often inadvertently mirror human errors and misconceptions, even when equipped with the capability to "know better." This behavior becomes particularly concerning when models are trained to generate text that is highly rated by human evaluators, leading to the potential output of erroneous statements that may go undetected. Our solution is to directly elicit latent knowledge (([elk](`https://docs.google.com/document/d/1WwsnJQstPq91_Yh-Ch2XRL8H_EpsnjrC1dwZXR37PC8/edit`) [@author:elk]) from within the activations of a language model to mitigate this challenge. -`elk` is a specialized library developed to provide both the original and an enhanced version of the CSS methodology. Described in the paper "Discovering Latent Knowledge in Language Models Without Supervision" by Burns et al. [@author:burns]. In addition, we have implemented an approach based on the Contrastive Representation Clustering (CRC) method (2022) from the same paper. +`elk` is a specialized library developed to provide both the original and an enhanced version of the CSS methodology. Described in the paper "Discovering Latent Knowledge in Language Models Without Supervision" by Burns et al. [@author:burns]. In addition, we have implemented an approach, called VINC, based on the Contrastive Representation Clustering (CRC) method from the same paper. `elk` serves as a tool for those seeking to investigate the veracity of model output and explore the underlying beliefs embedded within the model. The library offers: From ae1ca60afb8002abfaefc9d6a39c2b7b76d12ee2 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Fri, 11 Aug 2023 11:52:29 +0000 Subject: [PATCH 06/11] add all affiliations and contributors --- comparison-sweeps | 1 + joss/paper.md | 38 +++++++++++++++++++++++++++++++------- 2 files changed, 32 insertions(+), 7 deletions(-) create mode 160000 comparison-sweeps diff --git a/comparison-sweeps b/comparison-sweeps new file mode 160000 index 000000000..f4ed884b5 --- /dev/null +++ b/comparison-sweeps @@ -0,0 +1 @@ +Subproject commit f4ed884b59c99012c80b972d2a02c660b39c90cb diff --git a/joss/paper.md b/joss/paper.md index 5f63b3bfd..1ed3b716c 100644 --- a/joss/paper.md +++ b/joss/paper.md @@ -11,28 +11,52 @@ authors: affiliation: 1 - name: Walter Laurito corresponding: true - affiliation: 2 + affiliation: [2, 5] - name: Alex Mallen - affiliation: 1 + affiliation: [1, 7] - name: Fabien Roger affiliation: 3 - name: Kay Kozaronek - affiliation: 4 - - name: Christy Koh affiliation: 5 + - name: Christy Koh + affiliation: 4 - name: Jonathan NG affiliation: 6 + - name: James Chua + affiliation: 1 + - name: Alexander Wan + affiliation: 4 - name: Reagan Lee - affiliation: 7 - - name: Alex Wan + affiliation: 4 + - name: Ben W. + affiliation: 1 + - name: Kyle O'Brien + affiliation: [1, 6] + - name: Augustas Macijauskas affiliation: 8 + - name: Waree Sethapun + affiliation: 9 + - name: Eric Mungai Kinuthia + affiliation: 1 affiliations: - name: EleutherAI index: 1 - - name: FZI Research Center for Information Technology, Germany + - name: FZI Research Center for Information Technology index: 2 - name: Redwood Research index: 3 + - name: UC Berkeley + index: 4 + - name: NotodAI Research + index: 5 + - name: Microsoft + index: 6 + - name: University of Washington + index: 7 + - name: CAML Lab, University of Cambridge + index: 8 + - name: Princeton University + index: 9 date: 31 July 2023 bibliography: paper.bib From 421954947aafa8fc226745416a1adf5941509cce Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Fri, 11 Aug 2023 11:52:44 +0000 Subject: [PATCH 07/11] update date --- joss/paper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/joss/paper.md b/joss/paper.md index 7d6a90c99..7b3bbb7c4 100644 --- a/joss/paper.md +++ b/joss/paper.md @@ -57,7 +57,7 @@ affiliations: index: 8 - name: Princeton University index: 9 -date: 31 July 2023 +date: 11 08 2023 bibliography: paper.bib --- From c3b25a01821650593541310eefceda60b48495d9 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Fri, 11 Aug 2023 11:56:32 +0000 Subject: [PATCH 08/11] update affilations order --- joss/paper.md | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/joss/paper.md b/joss/paper.md index 7b3bbb7c4..0d9053346 100644 --- a/joss/paper.md +++ b/joss/paper.md @@ -11,27 +11,27 @@ authors: affiliation: 1 - name: Walter Laurito corresponding: true - affiliation: [2, 5] + affiliation: "2, 3" - name: Alex Mallen - affiliation: [1, 7] + affiliation: "1, 7" - name: Fabien Roger - affiliation: 3 + affiliation: 4 - name: Kay Kozaronek - affiliation: 5 + affiliation: 2 - name: Christy Koh - affiliation: 4 + affiliation: 5 - name: Jonathan NG - affiliation: 6 + affiliation: 2 - name: James Chua affiliation: 1 - name: Alexander Wan - affiliation: 4 + affiliation: 5 - name: Reagan Lee - affiliation: 4 + affiliation: 5 - name: Ben W. affiliation: 1 - name: Kyle O'Brien - affiliation: [1, 6] + affiliation: "1, 6" - name: Augustas Macijauskas affiliation: 8 - name: Waree Sethapun @@ -41,13 +41,13 @@ authors: affiliations: - name: EleutherAI index: 1 - - name: FZI Research Center for Information Technology + - name: NotodAI Research index: 2 - - name: Redwood Research + - name: FZI Research Center for Information Technology index: 3 - - name: UC Berkeley + - name: Redwood Research index: 4 - - name: NotodAI Research + - name: UC Berkeley index: 5 - name: Microsoft index: 6 From dbac8852af01fb16f1176a1d3669cdc73c24ca8d Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Fri, 11 Aug 2023 11:57:25 +0000 Subject: [PATCH 09/11] add aknowledgements --- joss/paper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/joss/paper.md b/joss/paper.md index 0d9053346..86453f5ff 100644 --- a/joss/paper.md +++ b/joss/paper.md @@ -81,4 +81,4 @@ Language models are proficient at predicting successive tokens in a sequence of For collaboration, discussion, and support, the [Eleuther AI Discord's elk channel](https://discord.com/channels/729741769192767510/1070194752785489991) provides a platform for engaging with others interested in the library or related research projects. # Acknowledgements -We would like to thank [EleutherAI](https://www.eleuther.ai/) and [SERI MATS](https://www.serimats.org/) for supporting our work. +We would like to thank [EleutherAI](https://www.eleuther.ai/), [SERI MATS](https://www.serimats.org/) for supporting our work and [Long-Term Future Fund (LTFF)](https://funds.effectivealtruism.org/funds/far-future) From 129fb857427c9202f56367c8540443e51fa4eac4 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Fri, 11 Aug 2023 12:15:52 +0000 Subject: [PATCH 10/11] add bib for elk --- joss/paper.bib | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/joss/paper.bib b/joss/paper.bib index 93b035ff8..d191fca27 100644 --- a/joss/paper.bib +++ b/joss/paper.bib @@ -5,3 +5,12 @@ @article{burns year={2022} } +@misc{elk, + author = {Christiano, Paul and Cotra, Ajeya and Xu, Mark}, + title = {Eliciting Latent Knowledge (ELK)}, + howpublished = {\url{https://docs.google.com/document/d/1WwsnJQstPq91_Yh-Ch2XRL8H_EpsnjrC1dwZXR37PC8/}}, + year = {December 2021}, + note = {[Accessed 11-08-2023]}, +} + + From 44b83e8ba439d04e0472534eff8e7f56a2793450 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Sat, 9 Dec 2023 19:13:20 +0100 Subject: [PATCH 11/11] change affilation --- joss/paper.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/joss/paper.md b/joss/paper.md index 86453f5ff..203d04f63 100644 --- a/joss/paper.md +++ b/joss/paper.md @@ -2,7 +2,7 @@ title: 'elk: A Python package to elicit latent knowledge from LLMs' tags: - python - - machine leaarning + - machine learning - interpretability - ai alignment - honest AI @@ -41,7 +41,7 @@ authors: affiliations: - name: EleutherAI index: 1 - - name: NotodAI Research + - name: Cadenza Labs index: 2 - name: FZI Research Center for Information Technology index: 3