@prefix rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix sl:    <http://www.semanlink.net/2001/00/semanlink-schema#> .
@prefix skos:  <http://www.w3.org/2004/02/skos/core#> .
@prefix rdfs:  <http://www.w3.org/2000/01/rdf-schema#> .
@prefix tag:   <http://www.semanlink.net/tag/> .
@prefix foaf:  <http://xmlns.com/foaf/0.1/> .
@prefix dc:    <http://purl.org/dc/elements/1.1/> .

tag:neural_models_for_information_retrieval
        a                 sl:Tag ;
        rdfs:isDefinedBy  tag:neural_models_for_information_retrieval.n3 ;
        skos:broader      tag:nlp_based_ir , tag:artificial_neural_network , tag:information_retrieval ;
        skos:prefLabel    "Neural Search" ;
        skos:related      tag:colbert , tag:retriever , tag:open_domain_question_answering , tag:knowledge_augmented_language_models , tag:cognitive_search , tag:retrieval_augmented_lm , tag:vector_retrieval , tag:vector_database ;
        foaf:page         tag:neural_models_for_information_retrieval.html .

tag:j_y_etais  a        sl:Tag ;
        skos:prefLabel  "J'y étais" .

tag:gautier_izacard  a  sl:Tag ;
        skos:prefLabel  "Gautier Izacard" .

tag:txtai  a            sl:Tag ;
        skos:broader    tag:neural_models_for_information_retrieval ;
        skos:prefLabel  "txtai" .

tag:nlp_tools  a        sl:Tag ;
        skos:prefLabel  "NLP tools" .

tag:artificial_neural_network
        a               sl:Tag ;
        skos:prefLabel  "Neural networks" .

tag:nlp_long_documents
        a               sl:Tag ;
        skos:prefLabel  "Long documents" .

<http://www.semanlink.net/doc/2021/07/a_moderate_proposal_for_radical>
        dc:title         "A Moderate Proposal for Radically Better AI-powered Web Search" ;
        sl:creationDate  "2021-07-10" ;
        sl:tag           tag:web_search , tag:nlp_stanford , tag:neural_models_for_information_retrieval , tag:colbert .

<http://www.semanlink.net/doc/2022/08/karl_higley_sur_twitter_many>
        dc:title         "Karl Higley sur Twitter : \"Many ANN search tools (e.g. FAISS, ScaNN) allow you to provide multiple points as part of the same query...\"" ;
        sl:comment       " > Puzzled why more retrieval models don’t take advantage of this. Give me 100 neighbors of ten points, not 1000 neighbors of one point! (Then score and order them.)\r\n" ;
        sl:creationDate  "2022-08-20" ;
        sl:tag           tag:tweet , tag:retriever_reader , tag:neural_models_for_information_retrieval , tag:faiss .

tag:neural_ranking_models
        a               sl:Tag ;
        skos:broader    tag:neural_models_for_information_retrieval ;
        skos:prefLabel  "Neural Ranking Models" .

tag:allen_institute_for_ai_a2i
        a               sl:Tag ;
        skos:prefLabel  "Allen Institute for AI (AI2)" .

<http://www.semanlink.net/doc/2021/07/2104_08663_beir_a_heterogeno>
        dc:title         "[2104.08663] BEIR: A Heterogenous Benchmark for Zero-shot Evaluation of Information Retrieval Models" ;
        sl:comment       "[GitHub](doc:2021/07/ukplab_beir_a_heterogeneous_be)\r\n\r\n> Our results show **BM25 is a robust baseline**\r\nand **Reranking-based models overall achieve\r\nthe best zero-shot performances**, however, at\r\nhigh computational costs. In contrast, **Dense retrieval\r\nmodels are computationally more efficient\r\nbut often underperform other approaches**\r\n\r\n17 English evaluation datasets, 9 heterogeneous tasks (Non-English left for future work)" ;
        sl:creationDate  "2021-07-09" ;
        sl:tag           tag:arxiv_doc , tag:nils_reimers , tag:benchmark , tag:beir , tag:okapi_bm25 , tag:neural_models_for_information_retrieval , tag:nlp_datasets , tag:zero_shot , tag:information_retrieval .

tag:domain_adaptation_nlp
        a               sl:Tag ;
        skos:prefLabel  "Domain adaptation (NLP)" .

tag:splade  a           sl:Tag ;
        skos:prefLabel  "SPLADE" .

tag:nlp_google  a       sl:Tag ;
        skos:prefLabel  "NLP@Google" .

<http://www.semanlink.net/doc/2022/01/raphaelsty_cherche_neural_sear>
        dc:title         "raphaelsty/cherche: Neural search" ;
        sl:comment       "> Cherche (search in French) allows you to create a neural search pipeline using retrievers and pre-trained language models as rankers. Cherche is meant to be used with small to medium sized corpora." ;
        sl:creationDate  "2022-01-11" ;
        sl:tag           tag:raphaelsty , tag:neural_models_for_information_retrieval , tag:github_project , tag:cherche_raphael .

<http://www.semanlink.net/doc/2023/01/cohere_sur_twitter_with_neur>
        dc:title         "cohere sur Twitter : \"With neural search, you can finally have a powerful search function for internal data that works just as well as external search engines such as Google. Making it especially useful for searching internal company documents...\"" ;
        sl:creationDate  "2023-01-23" ;
        sl:tag           tag:tweet , tag:nils_reimers , tag:neural_models_for_information_retrieval , tag:cohere .

tag:extractive_search
        a               sl:Tag ;
        skos:broader    tag:neural_models_for_information_retrieval ;
        skos:prefLabel  "Extractive Search" .

tag:discussed_with_ns
        a               sl:Tag ;
        skos:prefLabel  "Discussed with NS" .

tag:embeddings_in_ir  a  sl:Tag ;
        skos:broader    tag:neural_models_for_information_retrieval ;
        skos:prefLabel  "Embeddings in Information Retrieval" .

<http://www.semanlink.net/doc/2023/08/2002_06275_twinbert_distilli>
        dc:title         "[2002.06275] TwinBERT: Distilling Knowledge to Twin-Structured BERT Models for Efficient Retrieval" ;
        sl:creationDate  "2023-08-27" ;
        sl:tag           tag:nlp_microsoft , tag:neural_models_for_information_retrieval , tag:knowledge_distillation , tag:bert , tag:arxiv_doc .

tag:text_ranking  a     sl:Tag ;
        skos:prefLabel  "Text Ranking" .

tag:tree_structure  a   sl:Tag ;
        skos:prefLabel  "Tree structure" .

tag:not_encoding_knowledge_in_language_model
        a               sl:Tag ;
        skos:prefLabel  "Not Encoding Factual Knowledge in Language Model" .

tag:question_answering
        a               sl:Tag ;
        skos:prefLabel  "Question Answering" .

<http://www.semanlink.net/doc/2020/12/2002_08909_realm_retrieval_a>
        dc:title         "[2002.08909] REALM: Retrieval-Augmented Language Model Pre-Training" ;
        sl:comment       "**Augment language model pre-training with a retriever module**, which\r\nis trained using the masked language modeling objective.\r\n\r\n> To capture knowledge in a more modular and interpretable way, we augment language model pre-training with a latent knowledge retriever, which allows the model to retrieve and attend over documents from a large corpus such as Wikipedia, used during pre-training, fine-tuning and inference. **For the first time, we show how to pre-train such a knowledge retriever in an unsupervised manner**, using masked language modeling as the learning signal and backpropagating through a retrieval step that considers millions of documents\r\n\r\nHum, #TODO: parallel to be drawn with techniques in [KG-augmented Language Models](tag:knowledge_graph_augmented_language_models) which focus \"on the problem of capturing declarative knowledge in the learned parameters of a language model.\"\r\n\r\n[Google AI Blog Post](doc:2020/08/google_ai_blog_realm_integrat)\r\n\r\n[Summary](https://joeddav.github.io/blog/2020/03/03/REALM.html) for the [Hugging Face awesome-papers reading group](doc:2021/03/huggingface_awesome_papers_pap)" ;
        sl:creationDate  "2020-12-12" ;
        sl:tag           tag:retrieval_augmented_lm , tag:retriever_reader , tag:not_encoding_knowledge_in_language_model , tag:knowledge_augmented_language_models , tag:nlp_google , tag:arxiv_doc , tag:neural_models_for_information_retrieval , tag:realm .

tag:colbert  a          sl:Tag ;
        skos:broader    tag:neural_models_for_information_retrieval ;
        skos:prefLabel  "ColBERT" .

tag:nlp_stanford  a     sl:Tag ;
        skos:prefLabel  "NLP@Stanford" .

tag:nlp_microsoft  a    sl:Tag ;
        skos:prefLabel  "NLP@Microsoft" .

tag:nlp_companies  a    sl:Tag ;
        skos:prefLabel  "NLP: companies" .

<http://www.semanlink.net/doc/2022/11/pretrained_transformer_language>
        dc:title         "Pretrained Transformer Language Models for Search | Vespa Blog" ;
        sl:creationDate  "2022-11-04" ;
        sl:tag           tag:vespa , tag:neural_models_for_information_retrieval , tag:language_model .

<http://www.semanlink.net/doc/2023/03/2104_07186_coil_revisit_exac>
        dc:title         "[2104.07186] COIL: Revisit Exact Lexical Match in Information Retrieval with Contextualized Inverted List" ;
        sl:creationDate  "2023-03-08" ;
        sl:tag           tag:neural_models_for_information_retrieval , tag:information_retrieval , tag:discute_avec_raphael , tag:arxiv_doc .

<http://www.semanlink.net/doc/2022/03/naver_labs_europe_nils_reim>
        dc:title         "NAVER LABS Europe : \"@Nils_Reimers of @huggingface on 'Unsupervised domain adaptation for neural search'\"" ;
        sl:creationDate  "2022-03-09" ;
        sl:tag           tag:unsupervised_domain_adaptation_nlp , tag:nils_reimers , tag:neural_models_for_information_retrieval , tag:j_y_etais , tag:gpl_generative_pseudo_labeling .

tag:nasa  a             sl:Tag ;
        skos:prefLabel  "NASA" .

tag:domain_adaptation
        a               sl:Tag ;
        skos:prefLabel  "Domain adaptation" .

tag:zero_shot  a        sl:Tag ;
        skos:prefLabel  "Zero shot" .

tag:nlp_ens  a          sl:Tag ;
        skos:prefLabel  "NLP@ENS" .

<http://www.semanlink.net/doc/2022/05/max_irwin_sur_twitter_instan>
        dc:title         "Max Irwin sur Twitter : \"Instant Neural Search for your website! ...\"" ;
        sl:creationDate  "2022-05-19" ;
        sl:tag           tag:tweet , tag:nlp_tools , tag:neural_models_for_information_retrieval .

tag:learned_index_structures
        a               sl:Tag ;
        skos:prefLabel  "Learned Index Structures" .

tag:ikuya_yamada  a     sl:Tag ;
        skos:prefLabel  "Ikuya Yamada" .

tag:slides  a           sl:Tag ;
        skos:prefLabel  "Slides" .

tag:information_retrieval
        a               sl:Tag ;
        skos:prefLabel  "Information retrieval" .

<http://www.semanlink.net/doc/2023/11/raphaelsty_neural_cherche_neur>
        dc:title         "raphaelsty/neural-cherche: Neural Search" ;
        sl:comment       "> a library to fine-tune neural search models such as Splade, ColBERT, and SparseEmbed on a specific dataset" ;
        sl:creationDate  "2023-11-17" ;
        sl:tag           tag:raphaelsty , tag:splade , tag:neural_cherche , tag:colbert , tag:github_project , tag:neural_models_for_information_retrieval , tag:cherche_raphael .

tag:bert  a             sl:Tag ;
        skos:prefLabel  "BERT" .

tag:open_domain_question_answering
        a               sl:Tag ;
        skos:prefLabel  "Open Domain Question Answering" .

tag:bhaskar_mitra  a    sl:Tag ;
        skos:prefLabel  "Bhaskar Mitra" .

<http://www.semanlink.net/doc/2021/07/2010_06467_pretrained_transfo>
        dc:title         "[2010.06467] Pretrained Transformers for Text Ranking: BERT and Beyond" ;
        sl:comment       "a 155 pages paper!\r\n\r\n- [Ranking metrics](tag:ranking_metrics) p 23\r\n- keyword search p 35\r\n> most current applications of transformers for text ranking rely on keyword search in a multi-stage\r\nranking architecture, which is the focus of Section 3.\r\n- 3.3 From Passage to Document Ranking p 52 [#Long documents](tag:nlp_long_documents)" ;
        sl:creationDate  "2021-07-09" ;
        sl:tag           tag:attention_is_all_you_need , tag:text_ranking , tag:survey , tag:neural_models_for_information_retrieval , tag:nlp_long_documents , tag:arxiv_doc .

tag:retrieval_augmented_lm
        a               sl:Tag ;
        skos:prefLabel  "Retrieval augmented LM" .

tag:dense_retriever  a  sl:Tag ;
        skos:prefLabel  "Dense retriever" .

tag:neural_cherche  a   sl:Tag ;
        skos:prefLabel  "Neural-Cherche" .

tag:unsupervised_domain_adaptation_nlp
        a               sl:Tag ;
        skos:prefLabel  "Unsupervised Domain Adaptation (NLP)" .

<http://www.semanlink.net/doc/2022/11/shubham_saboo_sur_twitter_bu>
        dc:title         "Shubham Saboo sur Twitter : \"Build a Google-like search for your data in 30 mins...\" (using LLMs)" ;
        sl:creationDate  "2022-11-07" ;
        sl:tag           tag:tweet , tag:neural_models_for_information_retrieval , tag:cohere .

<http://www.semanlink.net/doc/2023/04/2009_13013_sparta_efficient_>
        dc:title         "[2009.13013] SPARTA: Efficient Open-Domain Question Answering via Sparse Transformer Matching Retrieval" ;
        sl:comment       "> SPARTA, a novel neural retrieval method... Unlike many neural ranking methods that use dense vector nearest neighbor search, SPARTA learns a sparse representation that can be efficiently implemented as an Inverted Index. The resulting representation enables scalable neural retrieval that does not require expensive approximate vector search and leads to better performance than its dense counterpart." ;
        sl:creationDate  "2023-04-06" ;
        sl:tag           tag:sparse_transformers , tag:open_domain_question_answering , tag:neural_models_for_information_retrieval , tag:nearest_neighbor_search , tag:arxiv_doc .

tag:arxiv_doc  a        sl:Tag ;
        skos:prefLabel  "Arxiv Doc" .

tag:dense_passage_retrieval
        a               sl:Tag ;
        skos:broader    tag:neural_models_for_information_retrieval ;
        skos:prefLabel  "Dense Passage Retrieval" .

tag:vespa  a            sl:Tag ;
        skos:broader    tag:neural_models_for_information_retrieval ;
        skos:prefLabel  "Vespa" .

tag:embeddings  a       sl:Tag ;
        skos:prefLabel  "Embeddings" .

<http://www.semanlink.net/doc/2021/09/haystack>
        dc:title         "Haystack (deepset)" ;
        sl:comment       "[deepset](doc:2021/09/nlp_solutions_to_streamline_neu)\r\n\r\n> Haystack is an **open-source framework** for building search systems that work intelligently over large document collections. Recent advances in NLP have enabled the application of question answering, retrieval and summarization to real world settings and Haystack is designed to be the bridge between research and industry." ;
        sl:creationDate  "2021-09-20" ;
        sl:tag           tag:question_answering , tag:nlp_tools , tag:neural_models_for_information_retrieval , tag:haystack , tag:discute_avec_raphael , tag:cognitive_search .

tag:graph  a            sl:Tag ;
        skos:prefLabel  "Graph" .

tag:attention_is_all_you_need
        a               sl:Tag ;
        skos:prefLabel  "Transformers" .

<http://www.semanlink.net/doc/2023/04/2304_01982_rethinking_the_rol>
        dc:title         "[2304.01982] Rethinking the Role of Token Retrieval in Multi-Vector Retrieval" ;
        sl:comment       "> Multi-vector retrievers like [ColBERT](tag:colbert) are powerful, but they come at the cost of complicated inference. In this paper, we ask: \"can token retrieval alone achieve great performance in multi-vector retrieval?\" [tweet](https://twitter.com/leejnhk/status/1643632578824396805?s=20)\r\n\r\n> The key insight of XTR is that the\r\ntoken-retrieval in multi-vector models should be **trained to retrieve the most salient and informative\r\ndocument tokens**, so that the score between a query and document can be computed using only the\r\nretrieved information, just like how single-vector retrieval models work\r\n\r\n> This is an *amazing* way to re-engineer the scoring mechanism of late interaction / ColBERT retrievers! [src: ColBERT's author Omar Khattab](https://twitter.com/lateinteraction/status/1643439889902637056?s=20)\r\n\r\n- scoring using only retrieved document terms\r\n- imputing missing token scores using their upper bound" ;
        sl:creationDate  "2023-04-05" ;
        sl:tag           tag:nlp_google , tag:neural_models_for_information_retrieval , tag:multi_vector_retrieval , tag:colbert , tag:arxiv_doc .

tag:okapi_bm25  a       sl:Tag ;
        skos:prefLabel  "BM25" .

<http://www.semanlink.net/doc/2020/12/2012_04584_distilling_knowled>
        dc:title         "[2012.04584] Distilling Knowledge from Reader to Retriever for Question Answering" ;
        sl:comment       "> a method to train an information retrieval module for downstream tasks, **without using pairs of queries and documents as annotations**.\r\n\r\nUses two models (standard pipeline for open-domain QA):\r\n\r\n- the first one retrieves documents from a large source of knowledge (the retriever)\r\n- the second one processes the support documents to solve the task (the reader).\r\n\r\n> First the retriever selects support passages in a large knowledge\r\nsource. Then these passages are processed by the reader, along with the question, to generate an\r\nanswer\r\n\r\nInspired by knowledge distillation: the reader model is the teacher and the retriever is the student.\r\n\r\n> More precisely, we use a sequence-to-sequence model as the reader, and use\r\nthe attention activations over the input documents as synthetic labels to train the retriever. \r\n> (**train the retriever by learning to approximate the attention score of the reader**)\r\n\r\nRefers to:\r\n\r\n- [REALM: Retrieval-Augmented Language Model Pre-Training](doc:2020/12/2002_08909_realm_retrieval_a)\r\n- [Dehghani: Neural Ranking Models with Weak Supervision](doc:?uri=https%3A%2F%2Farxiv.org%2Fabs%2F1704.08803)" ;
        sl:creationDate  "2020-12-11" ;
        sl:tag           tag:retriever_reader , tag:knowledge_augmented_language_models , tag:nlp_ens , tag:arxiv_doc , tag:gautier_izacard , tag:open_domain_question_answering , tag:nlp_facebook , tag:knowledge_distillation , tag:neural_models_for_information_retrieval , tag:question_answering .

tag:weaviate  a         sl:Tag ;
        skos:broader    tag:neural_models_for_information_retrieval ;
        skos:prefLabel  "Weaviate" .

tag:search  a           sl:Tag ;
        skos:prefLabel  "Search" .

tag:retriever_reader  a  sl:Tag ;
        skos:prefLabel  "Retriever-Reader" .

tag:nlp_low_resource_scenarios
        a               sl:Tag ;
        skos:prefLabel  "Low-Resource NLP" .

<http://www.semanlink.net/doc/2022/03/unsupervised_training_of_retrie>
        dc:title         "Unsupervised Training of Retrievers Using GenQ (The Art of Asking Questions with GenQ) | Pinecone" ;
        sl:creationDate  "2022-03-09" ;
        sl:tag           tag:unsupervised_domain_adaptation_nlp , tag:pinecone , tag:neural_models_for_information_retrieval .

tag:raphaelsty  a       sl:Tag ;
        skos:prefLabel  "Raphaël Sourty" .

tag:hybrid_search  a    sl:Tag ;
        skos:broader    tag:neural_models_for_information_retrieval ;
        skos:prefLabel  "Hybrid Search" .

<http://www.semanlink.net/doc/2024/02/raphaelsty_neural_tree_tree_ba>
        dc:title         "raphaelsty/neural-tree: Tree-based indexes for neural-search" ;
        sl:comment       "> Are tree-based indexes the counterpart of standard ANN algorithms for token-level embeddings IR models?\r\n\r\n> Neural-Tree creates a tree using hierarchical clustering of documents and then learn embeddings in each node of the tree using paired queries and documents. Additionally, there is the flexibility to input an existing tree structure in JSON format to build the index.\r\n\r\n[Constructing Tree-based Index for Efficient and Effective Dense Retrieval](https://dl.acm.org/doi/10.1145/3539618.3591651)" ;
        sl:creationDate  "2024-02-28" ;
        sl:tag           tag:tree_structure , tag:raphaelsty , tag:neural_models_for_information_retrieval , tag:learned_index_structures , tag:clustering_of_text_documents .

<http://www.semanlink.net/doc/2021/07/nandan_thakur_sur_twitter_i>
        dc:title         "Nandan Thakur sur Twitter : \"@ikuyamada @Nils_Reimers Thanks @ikuyamad...\"" ;
        sl:comment       "Related to [UKPLab/beir: A Heterogeneous Benchmark for Information Retrieval.](doc:2021/07/ukplab_beir_a_heterogeneous_be) and [[2106.00882] Efficient Passage Retrieval with Hashing for Open-domain Question Answering](doc:2021/06/2106_00882_efficient_passage_)" ;
        sl:creationDate  "2021-07-09" ;
        sl:tag           tag:tweet , tag:nils_reimers , tag:neural_models_for_information_retrieval , tag:ikuya_yamada .

tag:benchmark  a        sl:Tag ;
        skos:prefLabel  "Benchmark" .

tag:enterprise_search
        a               sl:Tag ;
        skos:prefLabel  "Enterprise Search" .

<http://www.semanlink.net/doc/2023/02/unlocking_the_power_of_vector_s>
        dc:title         "Unlocking the Power of Vector Search in Enterprise" ;
        sl:comment       "> we've developed a method for fine-tuning\r\nembeddings to the unique language of our clients" ;
        sl:creationDate  "2023-02-17" ;
        sl:tag           tag:nlp_companies , tag:neural_models_for_information_retrieval , tag:enterprise_search .

<http://www.semanlink.net/doc/2021/06/2106_04612_neural_extractive_>
        dc:title         "[2106.04612] Neural Extractive Search" ;
        sl:comment       "how to extend a\r\nsearch paradigm we call “**extractive search**” with\r\nneural similarity techniques.\r\n\r\n> some information needs require extracting\r\nand aggregating sub-sentence information\r\n(words, phrases, or entities) from multiple documents\r\n(e.g. a list of all the risk factors for a specific\r\ndisease and their number of mentions, or a comprehensive\r\ntable of startups and CEOs).\r\n\r\n> extractive search combines\r\ndocument selection with information extraction. **The query is extended with capture slots**:\r\nthese are **search terms that act as variables, whose\r\nvalues should be extracted**.\r\n> The user\r\nis then presented with the matched documents, each\r\nannotated with the corresponding captured spans,\r\nas well as aggregate information over the captured\r\nspans\r\n\r\nConclusion : \r\n\r\n> We presented a system for neural extractive search.\r\nWhile we found our system to be useful for scientific\r\nsearch, it also has clear limitations and areas\r\nfor improvement, both in terms of accuracy (only\r\n72.2% of the returned results are relevant, both the\r\nalignment and similarity models generalize well to\r\nsome relations but not to others), and in terms of\r\nscale\r\n\r\n[Video of demo](https://www.youtube.com/watch?v=TtqWi2GgB5A&t=1832s)" ;
        sl:creationDate  "2021-06-23" ;
        sl:tag           tag:yoav_goldberg , tag:search , tag:neural_models_for_information_retrieval , tag:cognitive_search , tag:arxiv_doc , tag:allen_institute_for_ai_a2i .

tag:nils_reimers  a     sl:Tag ;
        skos:prefLabel  "Nils Reimers" .

tag:unsupervised_machine_learning
        a               sl:Tag ;
        skos:prefLabel  "Unsupervised machine learning" .

tag:nlp_datasets  a     sl:Tag ;
        skos:prefLabel  "NLP datasets" .

<http://www.semanlink.net/doc/2021/09/nlp_solutions_to_streamline_neu>
        dc:title         "Build NLP features into your product | deepset" ;
        sl:creationDate  "2021-09-20" ;
        sl:tag           tag:question_answering , tag:nlp_tools , tag:nlp_companies , tag:neural_models_for_information_retrieval , tag:haystack , tag:discussed_with_ns .

<http://www.semanlink.net/doc/2023/02/chau_tran_sur_twitter_some_>
        dc:title         "Chau Tran sur Twitter : \"Some \"in the trenches\" learnings from integrating vector search into an enterprise search system...\"" ;
        sl:comment       "Blog post: [Unlocking the Power of Vector Search in Enterprise](doc:2023/02/unlocking_the_power_of_vector_s)\r\n\r\n> 1. As of Feb 2023, open source text embedding models on \r\n@huggingface (E5-large, Instructor-XL, and MPNet)\r\n are > to other commercial providers\r\n> 2. on out-of-domain data (enterprise search being an extreme case of this)... finetuning embedding models extremely helpful\r\n> 3. Vector search, while helpful, is not the whole story! We still need traditional keyword search and personalization " ;
        sl:creationDate  "2023-02-17" ;
        sl:tag           tag:tweet , tag:neural_models_for_information_retrieval , tag:enterprise_search , tag:embeddings , tag:domain_adaptation_nlp .

tag:multi_vector_retrieval
        a               sl:Tag ;
        skos:broader    tag:neural_models_for_information_retrieval ;
        skos:prefLabel  "Multi-Vector Retrieval" .

tag:late_interaction  a  sl:Tag ;
        skos:broader    tag:neural_models_for_information_retrieval ;
        skos:prefLabel  "Late-Interaction" .

tag:haystack  a         sl:Tag ;
        skos:broader    tag:neural_models_for_information_retrieval ;
        skos:prefLabel  "Haystack" .

<http://www.semanlink.net/doc/2020/08/google_ai_blog_realm_integrat>
        dc:title         "Google AI Blog: REALM: Integrating Retrieval into Language Representation Models" ;
        sl:comment       "> a new open-source method for language model pre-training that uses a supplemental knowledge retriever that enables it to perform well on knowledge-intensive tasks without billions of parameters.\r\n>\r\n> **The key intuition of REALM is that a retrieval system should improve the model's ability to fill in missing words**\r\n\r\n[Paper:  REALM: Retrieval-Augmented Language Model Pre-Training](doc:2020/12/2002_08909_realm_retrieval_a)" ;
        sl:creationDate  "2020-08-13" ;
        sl:tag           tag:realm , tag:nlp_google , tag:neural_models_for_information_retrieval , tag:knowledge_augmented_language_models .

tag:faiss  a            sl:Tag ;
        skos:prefLabel  "faiss" .

tag:cherche_raphael  a  sl:Tag ;
        skos:broader    tag:neural_models_for_information_retrieval ;
        skos:prefLabel  "Cherche (Raphaël)" .

tag:realm  a            sl:Tag ;
        skos:broader    tag:neural_models_for_information_retrieval ;
        skos:prefLabel  "REALM" .

tag:cohere  a           sl:Tag ;
        skos:prefLabel  "Cohere.ai" .

tag:knowledge_augmented_language_models
        a               sl:Tag ;
        skos:prefLabel  "Knowledge-augmented Language Models" .

<http://www.semanlink.net/doc/2021/12/semi_technologies_weaviate_wea>
        dc:title         "semi-technologies/weaviate: Weaviate is a cloud-native, modular, real-time vector search engine" ;
        sl:comment       "> vector search engine and vector database. Weaviate uses machine learning to vectorize and store data, and to find answers to natural language queries. " ;
        sl:creationDate  "2021-12-05" ;
        sl:tag           tag:weaviate , tag:vector_database , tag:similarity_queries , tag:nlp_tools , tag:neural_models_for_information_retrieval , tag:github_project .

tag:similarity_queries
        a               sl:Tag ;
        skos:prefLabel  "Similarity queries / Vector search" .

tag:retriever  a        sl:Tag ;
        skos:prefLabel  "Retriever" .

tag:beir  a             sl:Tag ;
        skos:broader    tag:neural_models_for_information_retrieval ;
        skos:prefLabel  "BEIR" .

tag:pinecone  a         sl:Tag ;
        skos:broader    tag:neural_models_for_information_retrieval ;
        skos:prefLabel  "Pinecone" .

tag:gpl_generative_pseudo_labeling
        a               sl:Tag ;
        skos:prefLabel  "GPL (Generative Pseudo Labeling)" .

tag:vector_retrieval  a  sl:Tag ;
        skos:prefLabel  "Vector Retrieval" .

tag:cognitive_search  a  sl:Tag ;
        skos:prefLabel  "Cognitive Search" .

tag:yoav_goldberg  a    sl:Tag ;
        skos:prefLabel  "Yoav Goldberg" .

tag:nlp_based_ir  a     sl:Tag ;
        skos:prefLabel  "NLP based IR" .

tag:knowledge_distillation
        a               sl:Tag ;
        skos:prefLabel  "Knowledge distillation" .

<http://www.semanlink.net/doc/2022/08/connor_shorten_sur_twitter_w>
        dc:title         "Connor Shorten sur Twitter : \"Wow, incredible to see this from NASA! 🚀 Weaviate's integration of Vector Search with Graph Data and Symbolic Relations is a very interesting combination of technologies!...\"" ;
        sl:creationDate  "2022-08-11" ;
        sl:tag           tag:weaviate , tag:tweet , tag:neural_models_for_information_retrieval , tag:nasa , tag:graph_database_and_nlp , tag:graph .

tag:sparse_transformers
        a               sl:Tag ;
        skos:prefLabel  "Sparse Transformers" .

<http://www.semanlink.net/doc/2022/07/devendra_singh_sachan_sur_twitt>
        dc:title         "Devendra Singh Sachan sur Twitter : \"ART (Autoencoding-based Retriever Training), an unsupervised method to train a dense retriever that only uses questions and a collection of unpaired documents as the training data.\"" ;
        sl:comment       "[Arxiv](doc:2022/07/2206_10658_questions_are_all_)" ;
        sl:creationDate  "2022-07-06" ;
        sl:tag           tag:tweet , tag:retriever , tag:open_domain_question_answering , tag:neural_models_for_information_retrieval .

tag:vector_database  a  sl:Tag ;
        skos:prefLabel  "Vector database" .

tag:survey  a           sl:Tag ;
        skos:prefLabel  "Survey / Review" .

<http://www.semanlink.net/doc/2019/08/neural_models_for_information_r>
        dc:title         "Neural Models for Information Retrieval (2017)" ;
        sl:creationDate  "2019-08-18" ;
        sl:tag           tag:slides , tag:neural_models_for_information_retrieval , tag:bhaskar_mitra .

tag:web_search  a       sl:Tag ;
        skos:prefLabel  "Web search" .

tag:nlp_facebook  a     sl:Tag ;
        skos:prefLabel  "NLP@Facebook" .

<http://www.semanlink.net/doc/2022/09/domain_adaptation_for_dense_ret>
        dc:title         "Domain Adaptation for Dense Retrieval Models - Nils Reimers - ICML 2022 Workshop on Knowledge Retrieval and Language Models" ;
        sl:comment       "Conclusion:\r\n\r\n- The knowledge in models get outdated quickly\r\n    - BERT thinks Barack Obama is the current US president\r\n- Search has a strong focus on recent events\r\n    - Not reflected in any benchmark so far\r\n- Dense models especially sensitive\r\n    - Issue affects all models (Cross-Encoder, Sparse Emb., doc2query)\r\n- How can we efficiently update our models to new domains / new language?\r\n- Current methods are extremely data inefficient \r\n     - How can we update our model from a single example? " ;
        sl:creationDate  "2022-09-02" ;
        sl:tag           tag:slides , tag:nils_reimers , tag:neural_models_for_information_retrieval , tag:domain_adaptation , tag:dense_retriever .

<http://www.semanlink.net/doc/2022/03/2203_06169_laprador_unsuperv>
        dc:title         "[2203.06169] LaPraDoR: Unsupervised Pretrained Dense Retriever for Zero-Shot Text Retrieval" ;
        sl:creationDate  "2022-03-29" ;
        sl:tag           tag:zero_shot , tag:unsupervised_machine_learning , tag:neural_models_for_information_retrieval , tag:arxiv_doc .

tag:clustering_of_text_documents
        a               sl:Tag ;
        skos:prefLabel  "Clustering of text documents" .

<http://www.semanlink.net/doc/2021/09/nils_reimers_sur_twitter_int>
        dc:title         "Nils Reimers sur Twitter : \"Introduction - Neural Search\"" ;
        sl:creationDate  "2021-09-20" ;
        sl:tag           tag:tweet , tag:nils_reimers , tag:neural_models_for_information_retrieval .

<http://www.semanlink.net/doc/2021/10/nils_reimers_sur_twitter_neu>
        dc:title         "Nils Reimers sur Twitter : \"Neural Search for Low Resource Scenarios...\"" ;
        sl:comment       "1. Is low resource actually realistic?\r\n    - No\r\n    - Important research questions:\r\n        - how to learn unsupervised\r\n        - how to exploit structure (ex. title and body)\r\n        - how to learn a concept from a single sentence\r\n2. How good are our benchmarks? \r\n3. Domain-Adaptation for Dense Embeddings\r\n    - first unsupervised training, then supervised\r\n    - TDSAE > ICT > MLM\r\n    - unclear how to adapt an existing model to a new model\r\n\r\n\r\n> TSDAE differs in that the decoder in MLM has access to full-length\r\nword embeddings for every single token. The TSDAE decoder only\r\nhas access to the sentence vector produced by the encoder." ;
        sl:creationDate  "2021-10-27" ;
        sl:tag           tag:tweet , tag:nils_reimers , tag:neural_models_for_information_retrieval , tag:nlp_low_resource_scenarios , tag:domain_adaptation_nlp , tag:benchmark .

<http://www.semanlink.net/doc/2022/09/2008_09093_parade_passage_re>
        dc:title         "[2008.09093] PARADE: Passage Representation Aggregation for Document Reranking" ;
        sl:comment       "recommandé par [Nils Reimers](tag:nils_reimers)" ;
        sl:creationDate  "2022-09-21" ;
        sl:tag           tag:neural_models_for_information_retrieval , tag:nlp_long_documents , tag:arxiv_doc .

tag:language_model  a   sl:Tag ;
        skos:prefLabel  "Language Model" .

tag:discute_avec_raphael
        a               sl:Tag ;
        skos:prefLabel  "Discuté avec Raphaël" .

tag:tweet  a            sl:Tag ;
        skos:prefLabel  "Tweet" .

tag:github_project  a   sl:Tag ;
        skos:prefLabel  "GitHub project" .

tag:nearest_neighbor_search
        a               sl:Tag ;
        skos:prefLabel  "Nearest neighbor search" .

tag:graph_database_and_nlp
        a               sl:Tag ;
        skos:prefLabel  "Graph database and NLP" .
