{"payload":{"pageCount":1,"repositories":[{"type":"Public","name":"GlotWeb","owner":"cisnlp","isFork":false,"description":"GlotWeb: Web Indexing for Low-Resource Languages -- under construction.","allTopics":["multilingual","dataset","glot","low-resource-languages","news-dataset","awsome-list"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":5,"forksCount":0,"license":"Creative Commons Zero v1.0 Universal","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-24T13:15:43.931Z"}},{"type":"Public","name":"cisnlp.github.io","owner":"cisnlp","isFork":false,"description":"Homepage of cisnlp","allTopics":[],"primaryLanguage":{"name":"SCSS","color":"#c6538c"},"pullRequestCount":0,"issueCount":0,"starsCount":3,"forksCount":0,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-21T08:17:55.718Z"}},{"type":"Public","name":"analogical_reasoning","owner":"cisnlp","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"JavaScript","color":"#f1e05a"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-13T12:55:24.781Z"}},{"type":"Public","name":"GlotCC","owner":"cisnlp","isFork":false,"description":"GlotCC: An Open Broad-Coverage CommonCrawl Corpus and Pipeline for Minority Languages","allTopics":["crawler","multlingual","corpus-linguistics","glot","language-identification","commoncrawl","common-crawl","glotcc","multilingual-dataset"],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":0,"issueCount":0,"starsCount":9,"forksCount":0,"license":"Creative Commons Zero v1.0 Universal","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,6,0,0,1,0,4,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-12T23:27:42.610Z"}},{"type":"Public","name":"MaskLID","owner":"cisnlp","isFork":false,"description":"MaskLID: Code-Switching Language Identification through Iterative Masking -- ACL 2024","allTopics":["language-identification","code-mixing","code-switching","language-identification-toolkit","code-switch","language-identifier"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":3,"forksCount":1,"license":"MIT License","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,1,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-11T11:58:44.052Z"}},{"type":"Public","name":"GlotScript","owner":"cisnlp","isFork":false,"description":"GlotScript: A Resource and Tool for Low Resource Writing System Identification -- LREC 2024","allTopics":["unicode","script","language-detection","iso15924","alphabet","unicode-characters","writing-systems","writing-system","glot","language-identification","unicodedata","script-detection"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":12,"forksCount":1,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-07T12:56:24.890Z"}},{"type":"Public","name":"Taxi1500","owner":"cisnlp","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":5,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-31T10:18:33.665Z"}},{"type":"Public","name":"TransMI","owner":"cisnlp","isFork":false,"description":"TransMI: A Framework to Create Strong Baselines from Multilingual Pretrained Language Models for Transliterated Data","allTopics":["transliteration","multilinguality","crosslingual-transfer","tokenizer-modification"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":4,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-30T21:19:10.257Z"}},{"type":"Public","name":"TransliCo","owner":"cisnlp","isFork":false,"description":"TransliCo: A Contrastive Learning Framework to Address the Script Barrier in Multilingual Pretrained Language Models","allTopics":["transliteration","multilinguality","crosslingual-transfer","script-barrier"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":4,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-23T18:32:09.376Z"}},{"type":"Public","name":"Spatial_Schemas","owner":"cisnlp","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"JavaScript","color":"#f1e05a"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":0,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-23T13:25:51.408Z"}},{"type":"Public","name":"GlotLID","owner":"cisnlp","isFork":false,"description":"GlotLID: Language Identification with Support for More Than 2000 Labels -- EMNLP 2023","allTopics":["language-detection","multlingual","language-detector","language-recognition","glot","lid","language-identification","language-classification","language-identification-toolkit","low-resource-languages","language-detection-library","language-identifier","language-detection-lib","langid","low-resource-nlp"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":76,"forksCount":7,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-12T18:02:56.765Z"}},{"type":"Public","name":"XAMPLER","owner":"cisnlp","isFork":false,"description":"XAMPLER: Learning to Retrieve Cross-Lingual In-Context Examples","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":3,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-09T06:48:10.178Z"}},{"type":"Public","name":"Glot500","owner":"cisnlp","isFork":false,"description":"Glot500: Scaling Multilingual Corpora and Language Models to 500 Languages -- ACL 2023","allTopics":["multilingual","nlp","natural-language-processing","acl","dataset","glot","xlm","multilingual-models","xlm-r","multilingual-nlp","glot500"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":96,"forksCount":3,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-20T23:47:45.126Z"}},{"type":"Public","name":"GlotSparse","owner":"cisnlp","isFork":false,"description":"GlotSparse: Building Corpora in Under-Resourced Languages","allTopics":["multilingual","dataset","corpus-linguistics","glot","low-resource-languages","news-dataset","awsome-list"],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":"Creative Commons Zero v1.0 Universal","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-18T17:23:33.603Z"}},{"type":"Public","name":"GlotStoryBook","owner":"cisnlp","isFork":false,"description":"Children StoryBooks for 180 langauges.","allTopics":["multilingual","storybook","dataset","glot","low-resource-languages","low-resource-nlp"],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":0,"issueCount":1,"starsCount":3,"forksCount":1,"license":"Creative Commons Zero v1.0 Universal","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-12T18:58:00.093Z"}},{"type":"Public","name":"mPLM-Sim","owner":"cisnlp","isFork":false,"description":"mPLM-Sim: Better Cross-Lingual Similarity and Transfer in Multilingual Pretrained Language Models","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":10,"forksCount":0,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-01-19T13:28:03.474Z"}},{"type":"Public","name":"ColexificationNet","owner":"cisnlp","isFork":false,"description":"Crosslingual Transfer Learning for Low-Resource Languages Based on Multilingual Colexification Graphs","allTopics":["embeddings","crosslingual-transfer","colexification"],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":0,"issueCount":0,"starsCount":3,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-01-15T15:29:47.704Z"}},{"type":"Public","name":"ofa","owner":"cisnlp","isFork":false,"description":"A Framework aims to wisely initialize unseen subword embeddings in PLMs for efficient large-scale continued pretraining","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":11,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-26T10:06:01.269Z"}},{"type":"Public","name":"simalign","owner":"cisnlp","isFork":false,"description":"Obtain Word Alignments using Pretrained Language Models (e.g., mBERT)","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":4,"starsCount":344,"forksCount":47,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-07T21:21:45.690Z"}},{"type":"Public","name":"lohoravens-webpage","owner":"cisnlp","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"JavaScript","color":"#f1e05a"},"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-10-23T12:09:07.037Z"}},{"type":"Public","name":"parcoure","owner":"cisnlp","isFork":false,"description":"ParCourE - Parallel Corpus Explorer","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":12,"forksCount":0,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-12-27T20:27:02.266Z"}},{"type":"Public","name":"graph-align","owner":"cisnlp","isFork":false,"description":"code for EMNLP graph align paper","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":9,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-11-05T15:45:40.862Z"}},{"type":"Public","name":"bias-in-nlp","owner":"cisnlp","isFork":false,"description":"Literature overview: gender bias in natural language processing","allTopics":["nlp","bias","nlp-machine-learning","gender-bias"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":10,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-01-26T09:59:47.456Z"}},{"type":"Public","name":"semi-markov-crf","owner":"cisnlp","isFork":false,"description":"Code for paper \"Neural Semi-Markov Conditional Random Fields for Robust Character-Based Part-of-Speech Tagging\"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":17,"forksCount":4,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2019-05-31T18:22:16.216Z"}}],"repositoryCount":24,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"Repositories"}