@prefix rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix sl:    <http://www.semanlink.net/2001/00/semanlink-schema#> .
@prefix skos:  <http://www.w3.org/2004/02/skos/core#> .
@prefix rdfs:  <http://www.w3.org/2000/01/rdf-schema#> .
@prefix tag:   <http://www.semanlink.net/tag/> .
@prefix foaf:  <http://xmlns.com/foaf/0.1/> .
@prefix dc:    <http://purl.org/dc/elements/1.1/> .

tag:multilingual_language_models
        a               sl:Tag ;
        skos:broader    tag:multilingual_nlp ;
        skos:prefLabel  "Multilingual Language Models" .

<http://www.semanlink.net/doc/2020/08/why_you_should_do_nlp_beyond_en>
        dc:title         "Why You Should Do NLP Beyond English" ;
        sl:comment       "> Only a few hundred languages\r\nare represented on the web and speakers of minority languages are severely\r\nlimited in the information available to them." ;
        sl:creationDate  "2020-08-01" ;
        sl:tag           tag:sebastian_ruder , tag:multilingual_nlp , tag:low_resource_languages .

tag:nlp_meta  a         sl:Tag ;
        skos:prefLabel  "NLP@Meta" .

tag:nlp_google  a       sl:Tag ;
        skos:prefLabel  "NLP@Google" .

<http://www.semanlink.net/doc/2022/06/domain_transfer_with_ggpl_germ>
        dc:title         "Domain transfer with GGPL: German Generative Pseudo Labeling 🥨 | by Matthias Richter | Jun, 2022 | ML6team" ;
        sl:creationDate  "2022-06-02" ;
        sl:tag           tag:multilingual_nlp , tag:gpl_generative_pseudo_labeling .

tag:nlp  a              sl:Tag ;
        skos:prefLabel  "NLP" .

tag:document_processing
        a               sl:Tag ;
        skos:prefLabel  "Document AI" .

tag:twitter_thread  a   sl:Tag ;
        skos:prefLabel  "Twitter thread" .

tag:colbert  a          sl:Tag ;
        skos:prefLabel  "ColBERT" .

<http://www.semanlink.net/doc/2022/04/2110_08151_mluke_the_power_o>
        dc:title         "[2110.08151] mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models" ;
        sl:comment       "[Ikuya Yamada sur Twitter : \"Is entity representation effective to improve multilingual language models?...\"](doc:2022/04/ikuya_yamada_sur_twitter_is_)\r\n\r\n> Recent studies have shown that multilingual pretrained language models can be effectively improved with cross-lingual alignment information from Wikipedia entities. However, **existing methods only exploit entity information in pretraining and do not explicitly use entities in downstream tasks**. In this study, we explore the **effectiveness of leveraging entity representations for downstream cross-lingual tasks**.\r\n>\r\n> the key insight is that incorporating entity representations into the input allows us to extract more language-agnostic features. \r\n\r\n[Github](https://github.com/studio-ousia/luke)\r\n\r\n> Entity representations are known to enhance\r\nlanguage models in mono-lingual settings\r\n(Zhang et al., 2019: [ERNIE](tag:ernie.html); Peters et al., 2019:  [[1909.04164] Knowledge Enhanced Contextual Word Representations](doc:2020/05/1909_04164_knowledge_enhanced); Wang et al.,\r\n2021 [[1911.06136] KEPLER: A Unified Model for Knowledge Embedding and Pre-trained Language Representation](doc:2020/11/1911_06136_kepler_a_unified_); Xiong et al., 2020; Yamada et al., 2020: [[2010.01057] LUKE: Deep Contextualized Entity Representations with Entity-aware Self-attention](doc:2020/11/2010_01057_luke_deep_context))\r\npresumably by introducing real-world knowledge.\r\nWe show that using entity representations facilitates\r\ncross-lingual transfer by providing languageindependent\r\nfeatures.\r\n>\r\n> Multilingual extension of LUKE. The model is trained with the multilingual\r\nmasked language modeling (MLM) task as well\r\nas the masked entity prediction (MEP) task with\r\nWikipedia entity embeddings\r\n\r\n> We investigate two ways of using the entity representations\r\nin cross-lingual transfer tasks:\r\n> 1. perform\r\nentity linking for the input text, and append\r\nthe detected entity tokens to the input sequence.\r\nThe entity tokens are expected to provide language independent\r\nfeatures to the model\r\n> 2. use the entity\r\n[MASK] token from the MEP task as a languageindependent\r\nfeature extractor." ;
        sl:creationDate  "2022-04-17" ;
        sl:tag           tag:multilingual_nlp , tag:masked_entity_prediction_task , tag:luke , tag:ikuya_yamada , tag:entities_and_lm , tag:arxiv_doc .

<http://www.semanlink.net/doc/2024/10/meta_ai_research_topic_no_lan>
        dc:title         "Meta AI Research Topic - No Language Left Behind" ;
        sl:creationDate  "2024-10-07" ;
        sl:tag           tag:no_language_left_behind , tag:nlp_meta , tag:multilingual_nlp , tag:low_resource_languages .

tag:omar_khattab  a     sl:Tag ;
        skos:prefLabel  "Omar Khattab" .

tag:multilingual_ai  a  sl:Tag ;
        skos:prefLabel  "Multilingual AI" .

tag:masked_entity_prediction_task
        a               sl:Tag ;
        skos:prefLabel  "Masked entity prediction" .

<http://www.semanlink.net/doc/2024/01/rachit_bansal_sur_x_extendin>
        dc:title         "Rachit Bansal sur X : \"An LLM can be efficiently *composed* with specialized (L)LMs to enable new tasks\"" ;
        sl:comment       "[[2401.02412] LLM Augmented LLMs: Expanding Capabilities through Composition](doc:2024/01/2401_02412_llm_augmented_llms)\r\n\r\n> CALM—Composition to Augment Language Models:\r\n> 1. Scales up LLMs on new tasks by *re-using* existing (L)LMs w/ very few new parameters & data,\r\n> 2. Keeps existing model weights intact, hence **preserves original capabilities**,\r\n> 3. Applies to diverse domains and settings.\r\n\r\n> Rather than a shallow combination, CALM introduces a small set of cross-attention parameters over models’ layer representations.\r\n\r\nUse-case example, Multilinguality:\r\n\r\n> We reuse an LM trained on a bunch of low-resource languages (LRLs)\r\nw/ an LLM that has never seen some of these LRLs.\r\n" ;
        sl:creationDate  "2024-01-06" ;
        sl:tag           tag:twitter_thread , tag:nlp_google , tag:multilingual_nlp , tag:llm_composition , tag:avoiding_catastrophic_forgetting .

tag:ikuya_yamada  a     sl:Tag ;
        skos:prefLabel  "Ikuya Yamada" .

tag:low_resource_languages
        a               sl:Tag ;
        skos:prefLabel  "Low-Resource Languages" .

tag:sentence_embeddings
        a               sl:Tag ;
        skos:prefLabel  "Sentence Embeddings" .

tag:llm_composition  a  sl:Tag ;
        skos:prefLabel  "LLM Composition" .

tag:arxiv_doc  a        sl:Tag ;
        skos:prefLabel  "Arxiv Doc" .

tag:cross_lingual_nlp
        a               sl:Tag ;
        skos:broader    tag:multilingual_nlp ;
        skos:prefLabel  "Cross-lingual NLP" .

tag:avoiding_catastrophic_forgetting
        a               sl:Tag ;
        skos:prefLabel  "Avoiding Catastrophic Forgetting" .

tag:layoutlm  a         sl:Tag ;
        skos:prefLabel  "LayoutLM" .

tag:multilingual_nlp  a   sl:Tag ;
        rdfs:isDefinedBy  tag:multilingual_nlp.n3 ;
        skos:broader      tag:nlp , tag:langues ;
        skos:prefLabel    "Multilingual NLP" ;
        skos:related      tag:multilingual_ai , tag:low_resource_languages ;
        foaf:page         tag:multilingual_nlp.html .

tag:pre_trained_language_models
        a               sl:Tag ;
        skos:prefLabel  "Pre-Trained Language Models" .

<http://www.semanlink.net/doc/2024/02/omar_khattab_sur_x_imo_one_o>
        dc:title         "Omar Khattab sur X : \"ColBERT in 81 languages by generalizing from English training! ...\"" ;
        sl:creationDate  "2024-02-28" ;
        sl:tag           tag:tweet , tag:omar_khattab , tag:multilingual_nlp , tag:colbert .

tag:entities_and_lm  a  sl:Tag ;
        skos:prefLabel  "Entities and LM" .

<http://www.semanlink.net/doc/2022/11/the_state_of_multilingual_ai>
        dc:title         "The State of Multilingual AI" ;
        sl:creationDate  "2022-11-14" ;
        sl:tag           tag:sebastian_ruder , tag:multilingual_nlp , tag:multilingual_ai .

tag:no_language_left_behind
        a               sl:Tag ;
        skos:broader    tag:multilingual_nlp ;
        skos:prefLabel  "No Language Left Behind" .

tag:langues  a          sl:Tag ;
        skos:prefLabel  "Langues" .

tag:nils_reimers  a     sl:Tag ;
        skos:prefLabel  "Nils Reimers" .

<http://www.semanlink.net/doc/2022/11/document_ai_lilt_a_better_lang>
        dc:title         "Document AI: LiLT a better language agnostic LayoutLM model" ;
        sl:creationDate  "2022-11-22" ;
        sl:tag           tag:multilingual_nlp , tag:layoutlm , tag:document_processing .

<http://www.semanlink.net/doc/2022/03/2004_09813_making_monolingual>
        dc:title         "[2004.09813] Making Monolingual Sentence Embeddings Multilingual using Knowledge Distillation" ;
        sl:creationDate  "2022-03-18" ;
        sl:tag           tag:sentence_embeddings , tag:nils_reimers , tag:multilingual_nlp , tag:knowledge_distillation , tag:arxiv_doc .

tag:luke  a             sl:Tag ;
        skos:prefLabel  "LUKE" .

<http://www.semanlink.net/doc/2023/05/2305_11778_cross_lingual_supe>
        dc:title         "[2305.11778] Cross-Lingual Supervision improves Large Language Models Pre-training" ;
        sl:comment       "> We demonstrate that pre-training Large Language Models on a mixture of a self-supervised Language Modeling objective and the supervised Machine Translation objective, therefore including cross-lingual parallel data during pre-training, yields models with better in-context learning abilities." ;
        sl:creationDate  "2023-05-22" ;
        sl:tag           tag:pre_trained_language_models , tag:nlp_google , tag:multilingual_nlp , tag:machine_translation , tag:arxiv_doc .

tag:gpl_generative_pseudo_labeling
        a               sl:Tag ;
        skos:prefLabel  "GPL (Generative Pseudo Labeling)" .

tag:machine_translation
        a               sl:Tag ;
        skos:prefLabel  "Machine translation" .

tag:multilingual_search
        a               sl:Tag ;
        skos:broader    tag:multilingual_nlp ;
        skos:prefLabel  "Multilingual search" .

tag:knowledge_distillation
        a               sl:Tag ;
        skos:prefLabel  "Knowledge distillation" .

tag:sebastian_ruder  a  sl:Tag ;
        skos:prefLabel  "Sebastian Ruder" .

tag:tweet  a            sl:Tag ;
        skos:prefLabel  "Tweet" .
