@prefix rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix sl:    <http://www.semanlink.net/2001/00/semanlink-schema#> .
@prefix skos:  <http://www.w3.org/2004/02/skos/core#> .
@prefix rdfs:  <http://www.w3.org/2000/01/rdf-schema#> .
@prefix tag:   <http://www.semanlink.net/tag/> .
@prefix foaf:  <http://xmlns.com/foaf/0.1/> .
@prefix dc:    <http://purl.org/dc/elements/1.1/> .

tag:deepseek_r1  a        sl:Tag ;
        rdfs:isDefinedBy  tag:deepseek_r1.n3 ;
        skos:broader      tag:reinforcement_learning , tag:reasoning_models , tag:deepseek ;
        skos:prefLabel    "Deepseek-r1" ;
        foaf:page         tag:deepseek_r1.html .

tag:reasoning_models  a  sl:Tag ;
        skos:prefLabel  "Reasoning models (Inference-time scaling)" .

tag:reasoning_models_math_evals
        a               sl:Tag ;
        skos:prefLabel  "Reasoning models: math evals" .

tag:knowledge_distillation
        a               sl:Tag ;
        skos:prefLabel  "Knowledge distillation" .

tag:reinforcement_learning
        a               sl:Tag ;
        skos:prefLabel  "Reinforcement learning" .

<http://www.semanlink.net/doc/2025/02/deepseek_r1_model_by_deepseek_a>
        dc:title         "deepseek-r1 Model by Deepseek-ai | NVIDIA NIM" ;
        sl:comment       "> DeepSeek-R1 is a first-generation **reasoning model trained using large-scale reinforcement learning** (RL) to solve complex reasoning tasks across domains such as math, code, and language. The model leverages RL to develop reasoning capabilities, which are further enhanced through supervised fine-tuning (SFT) to improve readability and coherence." ;
        sl:creationDate  "2025-02-24" ;
        sl:tag           tag:reinforcement_learning , tag:reasoning_models , tag:deepseek_r1 , tag:deepseek .

tag:chain_of_thought  a  sl:Tag ;
        skos:prefLabel  "Chain-of-thought" .

<http://www.semanlink.net/doc/2025/02/diffuse_one>
        dc:title         "diffuse.one/reasoning_update_0" ;
        sl:comment       "> There is an emerging pattern of fine-tuning a small language model followed by reinforcement learning.\r\n\r\n> A reasoning model is a large language model that is trained to output both a chain of thought and a response. The chain of thought should be relatively long (\r\n> 1,000 tokens) and the reasoning should improve its performance relative to a similar-sized non-reasoning models. This is sometimes called \"test-time\" or \"inference-time\" scaling because reasoning models emit more tokens per completion and gain some performance as a result." ;
        sl:creationDate  "2025-02-24" ;
        sl:tag           tag:reinforcement_learning , tag:reasoning_models , tag:reasoning_models_math_evals , tag:knowledge_distillation , tag:deepseek_r1 , tag:chain_of_thought .

tag:deepseek  a         sl:Tag ;
        skos:prefLabel  "DeepSeek" .
