@prefix rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix sl:    <http://www.semanlink.net/2001/00/semanlink-schema#> .
@prefix skos:  <http://www.w3.org/2004/02/skos/core#> .
@prefix rdfs:  <http://www.w3.org/2000/01/rdf-schema#> .
@prefix tag:   <http://www.semanlink.net/tag/> .
@prefix foaf:  <http://xmlns.com/foaf/0.1/> .
@prefix dc:    <http://purl.org/dc/elements/1.1/> .

tag:long_context  a     sl:Tag ;
        skos:prefLabel  "Long Context" .

tag:retrieval_augmented_generation
        a               sl:Tag ;
        skos:prefLabel  "RAG" .

tag:tweet  a            sl:Tag ;
        skos:prefLabel  "Tweet" .

<http://www.semanlink.net/doc/2024/01/jerry_liu_sur_x_text_splitti>
        dc:title         "LlamaIndex: \"Instead of using a global fixed chunk size for RAG, try splitting based on the semantics of the text\"" ;
        sl:creationDate  "2024-01-13" ;
        sl:tag           tag:tweet , tag:rag_chunking , tag:llamaindex , tag:jerry_liu .

tag:nils_reimers  a     sl:Tag ;
        skos:prefLabel  "Nils Reimers" .

tag:rag_chunking  a       sl:Tag ;
        rdfs:isDefinedBy  tag:rag_chunking.n3 ;
        skos:broader      tag:retrieval_augmented_generation ;
        skos:prefLabel    "RAG: chunking" ;
        foaf:page         tag:rag_chunking.html .

tag:llm  a              sl:Tag ;
        skos:prefLabel  "LLM" .

<http://www.semanlink.net/doc/2024/09/rohan_paul_sur_x_very_intere>
        dc:title         "Rohan Paul sur X : \"Late Chunking: Balancing Precision and Cost in Long Context Retrieval From @weaviate_io blog ...\"" ;
        sl:creationDate  "2024-09-07" ;
        sl:tag           tag:weaviate , tag:tweet , tag:rag_chunking , tag:long_context .

tag:text_embeddings  a  sl:Tag ;
        skos:prefLabel  "Text Embeddings" .

<http://www.semanlink.net/doc/2023/08/jerry_liu_sur_x_this_might_b>
        dc:title         "Jerry Liu sur X : \"This might be the first time ChatGPT (+@jxnlco) helped us come up with a better retrieval algorithm for RAG...\"" ;
        sl:comment       "<https://gpt-index.readthedocs.io/en/latest/examples/retrievers/auto_merging_retriever.html>\r\n\r\n1. Create a hierarchy/graph of “parent chunks” -> smaller chunks. Also link adjacent chunks together.\r\n2. During query-time, first retrieve smaller chunks with embedding similarity.\r\n3. Merge leaves: If any subset of these chunks is a major portion of a larger chunk, return the parent chunk instead." ;
        sl:creationDate  "2023-08-28" ;
        sl:tag           tag:rag_chunking , tag:retrieval_augmented_generation , tag:jerry_liu , tag:chatgpt .

<http://www.semanlink.net/doc/2023/09/rohan_sur_x_we_ve_seen_that_>
        dc:title         "Rohan sur X : \"smaller chunks are good for capturing semantic meaning and larger ones are good for providing better context. @llama_index AutoMergingRetriever takes it one step further...\"" ;
        sl:creationDate  "2023-09-30" ;
        sl:tag           tag:tweet , tag:tree_structure , tag:rag_chunking , tag:llamaindex .

<http://www.semanlink.net/doc/2023/12/jerry_liu_sur_x_naive_chunki>
        dc:title         "Jerry Liu sur X : \"Naive chunking strategies cause poor RAG performance....\"" ;
        sl:comment       "> **“small-to-big” chunking/retrieval strategies**" ;
        sl:creationDate  "2023-12-06" ;
        sl:tag           tag:tweet , tag:rag_chunking , tag:jerry_liu .

<http://www.semanlink.net/doc/2024/08/jina_ai_sur_x_late_chunking_>
        dc:title         "Jina AI sur X : \"Late Chunking...\"" ;
        sl:creationDate  "2024-08-24" ;
        sl:tag           tag:rag_chunking .

<http://www.semanlink.net/doc/2024/03/nils_reimers_sur_x_smlpth_w>
        dc:title         "Nils Reimers sur X : \"Embeddings can store only 1 aspect/topic per embedding well.\"" ;
        sl:comment       "> On Wikipedia, one paragraph typically focuses on one topic. So this gives you a good chunking for Wikipeda" ;
        sl:creationDate  "2024-03-13" ;
        sl:tag           tag:wikipedia , tag:tips , tag:text_embeddings , tag:rag_chunking , tag:nils_reimers .

tag:tree_structure  a   sl:Tag ;
        skos:prefLabel  "Tree structure" .

tag:tips  a             sl:Tag ;
        skos:prefLabel  "Tips" .

<http://www.semanlink.net/doc/2023/07/jerry_liu_sur_twitter_there_>
        dc:title         "Jerry Liu sur Twitter : \"Some critical data considerations that you must take into account to make your LLM application production-ready\"" ;
        sl:comment       ">  Using naive RAG techniques (naive text chunking, simple top-k retrieval -> LLM) is fine for hackathons, but will lead to lots of failure cases.\r\n\r\n[slides](https://docs.google.com/presentation/d/1wTEt3sy7ZHk3rYO3nFYhPZEFrfpG70l6WzY12wIaycE/edit#slide=id.p)\r\n\r\namong the points:\r\n\r\n- Good parser\r\n- Augmenting chunks with context. Eg. keeping page num with chunk allows for inline citation\r\n- Right indexes over your data\r\n- Using LLMs for Automatic Metadata Extraction" ;
        sl:creationDate  "2023-07-23" ;
        sl:tag           tag:tweet , tag:slides , tag:rag_chunking , tag:retrieval_augmented_generation , tag:llm , tag:jerry_liu .

tag:llamaindex  a       sl:Tag ;
        skos:prefLabel  "LlamaIndex" .

tag:slides  a           sl:Tag ;
        skos:prefLabel  "Slides" .

tag:weaviate  a         sl:Tag ;
        skos:prefLabel  "Weaviate" .

<http://www.semanlink.net/doc/2024/03/jerry_liu_sur_x_to_better_au>
        dc:title         "Jerry Liu sur X : \"To better augment LLMs with context, it makes a lot of sense to organize context not just as a flat list of text chunks, but as a hierarchy of high-level to low-level details. RAPTOR...\"" ;
        sl:comment       "> To better augment LLMs with context, it makes a lot of sense to organize context not just as a flat list of text chunks, but as a hierarchy of high-level to low-level details. \r\n>\r\n> RAPTOR is a super simple but neat idea towards this direction. Hierarchically cluster and summarize the text into a tree (the clustering is important, allows semantically related concepts to be grouped together and doesn't purely rely on spatial positioning!). During query-time dynamically retrieve the most relevant context to the question." ;
        sl:creationDate  "2024-03-03" ;
        sl:tag           tag:twitter_thread , tag:raptor , tag:rag_chunking , tag:jerry_liu .

tag:wikipedia  a        sl:Tag ;
        skos:prefLabel  "Wikipedia" .

tag:jerry_liu  a        sl:Tag ;
        skos:prefLabel  "Jerry Liu" .

tag:chatgpt  a          sl:Tag ;
        skos:prefLabel  "ChatGPT" .

tag:raptor  a           sl:Tag ;
        skos:prefLabel  "RAPTOR" .

<http://www.semanlink.net/doc/2023/09/jerry_liu_sur_x_here%E2%80%99s_a_sim>
        dc:title         "Jerry Liu sur X : \"A simple trick to improve retrieval for RAG 💡: Embed “references” to each text chunk instead of the chunk itself (e.g. smaller chunks, summaries)...\"" ;
        sl:creationDate  "2023-09-06" ;
        sl:tag           tag:tweet , tag:rag_chunking , tag:retrieval_augmented_generation , tag:llamaindex , tag:jerry_liu .

tag:twitter_thread  a   sl:Tag ;
        skos:prefLabel  "Twitter thread" .
