-
Notifications
You must be signed in to change notification settings - Fork 17
/
welcome.tsx
146 lines (139 loc) · 9.21 KB
/
welcome.tsx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import React from "react"
import { useState, FormEvent } from "react"
import { useNavigate } from "react-router-dom"
import { Feature } from "./types"
import FeatureSelect from "./components/featureSelect"
import { pathForFeature, DEFAULT_AUTOENCODER, AUTOENCODER_FAMILIES } from "./autoencoder_registry"
export default function Welcome() {
const navigate = useNavigate()
const GPT4_ATOMS_PER_SHARD = 1024;
const displayFeatures = [
/**************
/* well explained + interesting
***************/
{heading: 'GPT-4', heading_type: 'h4', feature: null, label: ''},
{feature: {atom: 62 * GPT4_ATOMS_PER_SHARD + 53, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})},
label: "humans have flaws", description: "descriptions of how humans are flawed"},
{feature: {atom: 25 * GPT4_ATOMS_PER_SHARD + 8, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})},
label: "police reports, especially child safety", description: "safety incidents especially related to children"},
{feature: {atom: 9 * GPT4_ATOMS_PER_SHARD + 44, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})},
label: "price changes", description: "ends of phrases describing commodity/equity price changes"},
{feature: {atom: 17 * GPT4_ATOMS_PER_SHARD + 33, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})},
label: "ratification (multilingual)", description: "ratification (multilingual)"},
{feature: {atom: 3 * GPT4_ATOMS_PER_SHARD + 421, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})},
label: "would [...]", description: "conditionals (things that would be true)"},
{feature: {atom: 63 * GPT4_ATOMS_PER_SHARD + 8, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})},
label: "identification documents (multilingual)", description: "identification documents (multilingual)"},
{feature: {atom: 0 * GPT4_ATOMS_PER_SHARD + 14, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})},
label: "lightly incremented timestamps", description: "timestamps being lightly incremented with recurring formats"},
{heading: 'Technical knowledge', heading_type: 'h3', feature: null, label: ''},
{feature: {atom: 40 * GPT4_ATOMS_PER_SHARD + 42, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})},
label: "machine learning training logs", description: "machine learning training logs"},
{feature: {atom: 12 * GPT4_ATOMS_PER_SHARD + 47, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})},
label: "onclick/onchange = function(this)", description: "onclick/onchange = function(this)"},
{feature: {atom: 54 * GPT4_ATOMS_PER_SHARD + 23, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})},
label: "edges (graph theory) and related concepts", description: "edges (graph theory) and related concepts"},
{feature: {atom: 56 * GPT4_ATOMS_PER_SHARD + 12, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})},
label: "algebraic rings", description: "algebraic rings"},
{feature: {atom: 28 * GPT4_ATOMS_PER_SHARD + 47, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})},
label: "adenosine/dopamine receptors", description: "adenosine/dopamine receptors"},
{feature: {atom: 2 * GPT4_ATOMS_PER_SHARD + 601, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})},
label: "blockchain vibes", description: "blockchain vibes"},
{heading: 'GPT-2 small', heading_type: 'h4', feature: null, label: ''},
{feature: {atom: 488432, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({
num_features: '2097152', num_active_features: '8'
})}, label: "rhetorical questions", description: "rhetorical questions"},
{feature: {atom: 2088200, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({
num_features: '2097152', num_active_features: '8'
})}, label: "counting human casualties", description: "counting human casualties"},
{feature: {atom: 1621560, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({
num_features: '2097152', num_active_features: '8'
})}, label: "X and Y phrases", description: "X and -> Y"},
{feature: {atom: 733, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({
num_features: '32768', num_active_features: '8'
})}, label: "Patrick/Patty surname predictor", description: "Predicts surnames after Patrick"},
{feature: {atom: 64464, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({
num_features: '131072', num_active_features: '32'
})}, label: "things that are unknown", description: "things that are unknown"},
{feature: {atom: 56907, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({ // similar to 33248
num_features: '131072', num_active_features: '32'
})}, label: "words in quotes", description: "predicts words in quotes"},
{feature: {atom: 1605835, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({
num_features: '2097152', num_active_features: '8'
})}, label: "these/those responsible things", description: "these/those, in a phrase where something is responsible for something"},
{feature: {atom: 8040, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({
num_features: '8192', num_active_features: '32'
})}, label: "2018 natural disasters", description: "2018 natural disasters"},
{feature: {atom: 21464, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({
num_features: '131072', num_active_features: '32'
})}, label: "addition in code", description: "addition in code"},
{feature: {atom: 66232, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({
num_features: '131072', num_active_features: '32'
})}, label: "function application", description: "function application"},
{feature: {atom: 64464, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({
num_features: '131072', num_active_features: '32'
})}, label: "unclear/hidden things", description: "unclear/hidden things (top only)"},
{feature: {atom: 10423, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({
num_features: '131072', num_active_features: '32'
})}, label: "what the ...", description: "[who/what/when/where/why/how] the"},
{heading: 'Safety relevant features (found via attribution methods)', heading_type: 'h3', feature: null, label: ''},
{feature: {atom: 64840, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({
num_features: '131072', num_active_features: '32'
})}, label: "profanity (1)", description: "activates in order to output profanity"},
{feature: {atom: 104813, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({
num_features: '131072', num_active_features: '32'
})}, label: "profanity (2)", description: "activates on profanity"},
{feature: {atom: 101090, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({
num_features: '131072', num_active_features: '32'
})}, label: "profanity (3)", description: "activates on 'fucking' (profane, not sexual contexts)"},
{feature: {atom: 72185, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({
num_features: '131072', num_active_features: '32'
})}, label: "erotic content", description: "erotic content"},
{feature: {atom: 69134, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({
num_features: '131072', num_active_features: '32'
})}, label: "[content warning] sexual abuse", description: "sexual abuse"},
// {feature: {atom: 2, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({
// num_features: '2097152', num_active_features: '8'
// })}, label: "things being brought", description: "bring * -> together/back"},
]
let [feature, setFeature] = useState({
atom: 0, autoencoder: DEFAULT_AUTOENCODER
})
const handleClick = (click_feature: Feature) => {
navigate(pathForFeature(click_feature))
}
return (
<div className="flex flex-col" style={{'padding': '100px'}}>
<h1 className="text-2xl font-bold mb-4">Welcome! This is a viewer for sparse autoencoders features trained in <a href="https://cdn.openai.com/papers/sparse-autoencoders.pdf">this paper</a> </h1>
<h1>Pick a feature:</h1>
<FeatureSelect
init_feature={feature}
onFeatureChange={(f: Feature) => setFeature(f)}
onFeatureSubmit={(f: Feature) => navigate(pathForFeature(f))}
show_go={true}
/>
<div className="mt-4">
<h2 className="text-xl font-bold mb-2">Interesting features:</h2>
<div className="mb-10 flex-row">
<div
className="flex flex-flow flex-wrap"
>
{displayFeatures.map(({ heading, heading_type, feature, label, description }, j) => (
heading ? <div style={{width: '100%'}} key={j}>
{React.createElement(heading_type, {}, heading)}
</div> : <button
key={j}
onClick={() => handleClick(feature)}
style={{ width: 200 }}
className="text-blue-500 hover:text-blue-700"
title={description}
>
{label}
</button>
))}
</div>
</div>
</div>
</div>
)
}