{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":690106318,"defaultBranch":"main","name":"nanotron","ownerLogin":"huggingface","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2023-09-11T14:40:28.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/25720743?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"","listCacheKey":"v0:1719315740.0","currentOid":""},"activityList":{"items":[{"before":"efa28b554e934cc3efedda39ce0307aa9219b56b","after":"93c62cda15c2e9eef7a61f5e3111ea0eb09439fe","ref":"refs/heads/bench_cluster","pushedAt":"2024-06-25T16:53:49.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"rename profiler folder output","shortMessageHtmlLink":"rename profiler folder output"}},{"before":"d5bf8e3a66806b589f7ebd4c247ad4cfd595dde9","after":"f33e8181632671838a2a281deae8a2caf3c4fdb1","ref":"refs/heads/refacto-generate-3","pushedAt":"2024-06-25T12:03:03.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"refacto generate + use simpler rotary for inference","shortMessageHtmlLink":"refacto generate + use simpler rotary for inference"}},{"before":"57080d908320015dad76c7ca2a311d68cb8c4a7b","after":"d5bf8e3a66806b589f7ebd4c247ad4cfd595dde9","ref":"refs/heads/refacto-generate-3","pushedAt":"2024-06-25T12:02:48.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"Fix pipeline parallel rank calculation in ParallelContext","shortMessageHtmlLink":"Fix pipeline parallel rank calculation in ParallelContext"}},{"before":"c50b4d0903b7c9b7f3b2b667a7fc93b6ff3e0a99","after":"57080d908320015dad76c7ca2a311d68cb8c4a7b","ref":"refs/heads/refacto-generate-3","pushedAt":"2024-06-25T11:53:36.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"refacto generate + use simpler rotary for inference","shortMessageHtmlLink":"refacto generate + use simpler rotary for inference"}},{"before":"9418fd15ec42f367421048254bd89b7fd7ac72e4","after":"c50b4d0903b7c9b7f3b2b667a7fc93b6ff3e0a99","ref":"refs/heads/refacto-generate-3","pushedAt":"2024-06-25T11:52:56.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"Update LlamaConfig with rope_interleaved flag","shortMessageHtmlLink":"Update LlamaConfig with rope_interleaved flag"}},{"before":"81a7f16771ef1a95cb65e5c6ff4430afa4df5728","after":"9418fd15ec42f367421048254bd89b7fd7ac72e4","ref":"refs/heads/refacto-generate-3","pushedAt":"2024-06-25T11:50:01.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"refacto generate","shortMessageHtmlLink":"refacto generate"}},{"before":null,"after":"81a7f16771ef1a95cb65e5c6ff4430afa4df5728","ref":"refs/heads/refacto-generate-3","pushedAt":"2024-06-25T11:42:20.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"refacto generate","shortMessageHtmlLink":"refacto generate"}},{"before":"a6b8b5eca6edd79e42356f23b46f0004f123dad8","after":"15fd9dd109e0fb23958d50ea88b723a0db093c72","ref":"refs/heads/xrsrke/infini_attention_this_actually_works","pushedAt":"2024-06-25T11:17:12.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"xrsrke","name":"XλRI-U5","path":"/xrsrke","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22252984?s=80&v=4"},"commit":{"message":"add l2 nor, kurtosis, abmax, weight update magnitude","shortMessageHtmlLink":"add l2 nor, kurtosis, abmax, weight update magnitude"}},{"before":"dcbe86e83d99749ec0d6d49209c1d66347d71345","after":"a6b8b5eca6edd79e42356f23b46f0004f123dad8","ref":"refs/heads/xrsrke/infini_attention_this_actually_works","pushedAt":"2024-06-25T06:38:43.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"xrsrke","name":"XλRI-U5","path":"/xrsrke","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22252984?s=80&v=4"},"commit":{"message":"add debugging","shortMessageHtmlLink":"add debugging"}},{"before":"1f5a1dd8428c41942cf63f994e3ce6075abe2d0d","after":"efa28b554e934cc3efedda39ce0307aa9219b56b","ref":"refs/heads/bench_cluster","pushedAt":"2024-06-24T10:03:52.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"only create one profiler trace","shortMessageHtmlLink":"only create one profiler trace"}},{"before":"72ccd4c580f89925b33c392b3961d4475dc9f2e1","after":"1f5a1dd8428c41942cf63f994e3ce6075abe2d0d","ref":"refs/heads/bench_cluster","pushedAt":"2024-06-24T09:26:32.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"fix recording trace","shortMessageHtmlLink":"fix recording trace"}},{"before":"b49c261d5dbab4c8b2b374b127e490b585f0c1c3","after":"72ccd4c580f89925b33c392b3961d4475dc9f2e1","ref":"refs/heads/bench_cluster","pushedAt":"2024-06-24T08:49:34.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"small changes for profiler","shortMessageHtmlLink":"small changes for profiler"}},{"before":null,"after":"b49c261d5dbab4c8b2b374b127e490b585f0c1c3","ref":"refs/heads/bench_cluster","pushedAt":"2024-06-20T16:00:40.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"log memory usage at each steps","shortMessageHtmlLink":"log memory usage at each steps"}},{"before":"838cce606351a7cb211227b9590208d585cbf88d","after":"dcbe86e83d99749ec0d6d49209c1d66347d71345","ref":"refs/heads/xrsrke/infini_attention_this_actually_works","pushedAt":"2024-06-20T07:29:22.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"xrsrke","name":"XλRI-U5","path":"/xrsrke","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22252984?s=80&v=4"},"commit":{"message":"refactor for generating finetuning data and run evals","shortMessageHtmlLink":"refactor for generating finetuning data and run evals"}},{"before":"c66b4ba6b05356ba20ef990432c2fcfe877a9d69","after":"1c17bbbf9ddb3eec2bb8ec0b44b7eeb568e2f88a","ref":"refs/heads/refacto-generate","pushedAt":"2024-06-18T18:07:28.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"add pipeline stage attribute","shortMessageHtmlLink":"add pipeline stage attribute"}},{"before":"e9437753d76dcadd0d346cd54a8cd3189fa92403","after":"ebd96c0e3f81cb107871dcbe02e54467768b855c","ref":"refs/heads/xrsrke/fp8-end-to-end","pushedAt":"2024-06-18T10:54:59.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"xrsrke","name":"XλRI-U5","path":"/xrsrke","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22252984?s=80&v=4"},"commit":{"message":"clean up","shortMessageHtmlLink":"clean up"}},{"before":"be2da504e50edd64118c97acbaa3d027d3c58ce5","after":"c66b4ba6b05356ba20ef990432c2fcfe877a9d69","ref":"refs/heads/refacto-generate","pushedAt":"2024-06-17T19:33:22.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"clean generate","shortMessageHtmlLink":"clean generate"}},{"before":"bf6846eed76c52fcb8af23c820099d76eb38d043","after":"be2da504e50edd64118c97acbaa3d027d3c58ce5","ref":"refs/heads/refacto-generate","pushedAt":"2024-06-17T16:06:30.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"make use_cache work with multi parallelism","shortMessageHtmlLink":"make use_cache work with multi parallelism"}},{"before":"76ac8ca2c0c3fa6cac78c422358fcfa11fdd8b76","after":"bf6846eed76c52fcb8af23c820099d76eb38d043","ref":"refs/heads/refacto-generate","pushedAt":"2024-06-17T15:37:30.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"refacto use_cache to unify with no_cache","shortMessageHtmlLink":"refacto use_cache to unify with no_cache"}},{"before":"1503d9e58a37eabc9cdddc42f53d3417e2224714","after":"76ac8ca2c0c3fa6cac78c422358fcfa11fdd8b76","ref":"refs/heads/refacto-generate","pushedAt":"2024-06-17T15:27:06.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"add changes to support cache","shortMessageHtmlLink":"add changes to support cache"}},{"before":"c0c74aaa890a776ce4d0182d9b89544520fe41d1","after":"1503d9e58a37eabc9cdddc42f53d3417e2224714","ref":"refs/heads/refacto-generate","pushedAt":"2024-06-17T13:31:05.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"fix bug with multiple process group + add sampler","shortMessageHtmlLink":"fix bug with multiple process group + add sampler"}},{"before":"074456b03fc8f012f95a8fac95934688f2801dee","after":"e9437753d76dcadd0d346cd54a8cd3189fa92403","ref":"refs/heads/xrsrke/fp8-end-to-end","pushedAt":"2024-06-17T11:24:13.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"xrsrke","name":"XλRI-U5","path":"/xrsrke","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22252984?s=80&v=4"},"commit":{"message":"refactor tests, add some fp8 optimizer state tests","shortMessageHtmlLink":"refactor tests, add some fp8 optimizer state tests"}},{"before":"31c12e86f83052a85caedcf978ae6373ede43cda","after":"c0c74aaa890a776ce4d0182d9b89544520fe41d1","ref":"refs/heads/refacto-generate","pushedAt":"2024-06-17T10:28:58.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"generate refacto is now working","shortMessageHtmlLink":"generate refacto is now working"}},{"before":"73fbb12acbc0a62e3752ccfb427f93c5279844ff","after":"074456b03fc8f012f95a8fac95934688f2801dee","ref":"refs/heads/xrsrke/fp8-end-to-end","pushedAt":"2024-06-15T08:15:52.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"xrsrke","name":"XλRI-U5","path":"/xrsrke","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22252984?s=80&v=4"},"commit":{"message":"fix loss stops at 8 due to constant learning rate","shortMessageHtmlLink":"fix loss stops at 8 due to constant learning rate"}},{"before":"b2a0af50e6d9fe1125f9a868477466db53d5df62","after":"31c12e86f83052a85caedcf978ae6373ede43cda","ref":"refs/heads/refacto-generate","pushedAt":"2024-06-14T09:46:33.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"add LlamaRotary because generation is not good otherwise","shortMessageHtmlLink":"add LlamaRotary because generation is not good otherwise"}},{"before":"63b6427501d87279257ec0d23ed1573f3aa12f1c","after":"b2a0af50e6d9fe1125f9a868477466db53d5df62","ref":"refs/heads/refacto-generate","pushedAt":"2024-06-14T09:46:08.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"rollback to flash_attn_func instead of flash_attn_varlen_func in forward","shortMessageHtmlLink":"rollback to flash_attn_func instead of flash_attn_varlen_func in forward"}},{"before":"03b83fc825418ac4f22d2fcea41514cdba6cedb9","after":"84f141cdcbc4450139334e75117ce7e9fa80d93f","ref":"refs/heads/xrsrke/llama3_ref_for_infiniattn","pushedAt":"2024-06-14T04:07:58.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"xrsrke","name":"XλRI-U5","path":"/xrsrke","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22252984?s=80&v=4"},"commit":{"message":"fix theta and interleaved rope","shortMessageHtmlLink":"fix theta and interleaved rope"}},{"before":null,"after":"03b83fc825418ac4f22d2fcea41514cdba6cedb9","ref":"refs/heads/xrsrke/llama3_ref_for_infiniattn","pushedAt":"2024-06-14T03:59:30.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"xrsrke","name":"XλRI-U5","path":"/xrsrke","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22252984?s=80&v=4"},"commit":{"message":"reference for infini attention","shortMessageHtmlLink":"reference for infini attention"}},{"before":null,"after":"03b83fc825418ac4f22d2fcea41514cdba6cedb9","ref":"refs/heads/xrsrke/ref_for_infini_attn","pushedAt":"2024-06-14T03:58:50.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"xrsrke","name":"XλRI-U5","path":"/xrsrke","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/22252984?s=80&v=4"},"commit":{"message":"reference for infini attention","shortMessageHtmlLink":"reference for infini attention"}},{"before":"3e169c5afc80b3494c1065bd4ef3079dc2b657de","after":"63b6427501d87279257ec0d23ed1573f3aa12f1c","ref":"refs/heads/refacto-generate","pushedAt":"2024-06-13T16:20:02.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"3outeille","name":"Ferdinand Mom","path":"/3outeille","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/47445085?s=80&v=4"},"commit":{"message":"add LlamaRotary because generation is not good otherwise","shortMessageHtmlLink":"add LlamaRotary because generation is not good otherwise"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAEbt6gNgA","startCursor":null,"endCursor":null}},"title":"Activity · huggingface/nanotron"}