<?xml version='1.0' encoding='UTF-8'  ?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/">	<channel rdf:about="http://www.semanlink.net/tag/general_nlp_tasks">		<title>General NLP tasks</title>		<link>http://www.semanlink.net/tag/general_nlp_tasks</link>		<description>Documents tagged with General NLP tasks</description>		<items>			<rdf:Seq>							<rdf:li resource="http://www.semanlink.net/doc/2023/09/maarten_grootendorst_sur_x_i"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/06/scott_condron_sur_twitter_la"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/04/meta_learning_for_keyphrase_ext"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/10/maartengr_keybert_minimal_keyw"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/09/ramsri_goutham_golla_sur_twitte"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/05/2012_12624_learning_dense_rep"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/02/2109_06304_phrase_bert_impro"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/02/nlp_pos_part_of_speech_taggi"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/02/part_of_speech_pos_tag_%7C_depen"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/09/2109_08133_phrase_retrieval_l"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/06/yake_keyword_extraction_from_s"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/05/large_scale_evaluation_of_keyph"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/05/simple_unsupervised_keyphrase_e"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/12/autophrase_automated_phrase_mi"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/12/keyword_extraction_with_bert_%7C_"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/07/dicksontsai_stanford_nlp_local_"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/03/ambiversenlu_a_natural_languag"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/08/blackstone_concept_extractor_"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/07/mining_quality_phrases_from_mas"/>				<rdf:li resource="https://dl.acm.org/citation.cfm?id=1321475"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/02/keywords2vec"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/02/jeremy_howard_on_twitter_such"/>				<rdf:li resource="https://tech.goibibo.com/key-topics-extraction-and-contextual-sentiment-of-users-reviews-20e63c0fd7ca"/>				<rdf:li resource="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3168314/"/>				<rdf:li resource="https://www.researchgate.net/publication/321841361_Text_feature_extraction_based_on_deep_learning_a_review"/>				<rdf:li resource="http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0192360"/>				<rdf:li resource="http://www.cis.pku.edu.cn/faculty/system/zhangyan/papers/DMMOOC2017-jiang.pdf"/>				<rdf:li resource="http://acl2014.org/acl2014/P14-1/pdf/P14-1119.pdf"/>				<rdf:li resource="https://arxiv.org/abs/1404.5367"/>				<rdf:li resource="http://aclweb.org/anthology/P14-3006"/>				<rdf:li resource="https://techblog.cdiscount.com/part-speech-tagging-tutorial-keras-deep-learning-library/"/>				<rdf:li resource="https://guillaumegenthial.github.io/sequence-tagging-with-tensorflow.html"/>				<rdf:li resource="https://graphaware.com/neo4j/2017/10/03/efficient-unsupervised-topic-extraction-nlp-neo4j.html"/>				<rdf:li resource="https://www.quora.com/What-is-a-simple-but-detailed-explanation-of-Textrank"/>				<rdf:li resource="https://fr.slideshare.net/andrewkoo/textrank-algorithm"/>				<rdf:li resource="http://textminingonline.com/dive-into-nltk-part-v-using-stanford-text-analysis-tools-in-python"/>				<rdf:li resource="https://github.com/nltk/nltk/wiki/Installing-Third-Party-Software"/>				<rdf:li resource="http://www.nltk.org/_modules/nltk/tag/stanford.html"/>				<rdf:li resource="http://rdrpostagger.sourceforge.net/"/>				<rdf:li resource="http://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/"/>				<rdf:li resource="http://www.nltk.org/api/nltk.tag.html#module-nltk.tag.stanford"/>				<rdf:li resource="https://nlp.stanford.edu/software/tagger.shtml"/>				<rdf:li resource="https://stackoverflow.com/questions/15388831/what-are-all-possible-pos-tags-of-nltk"/>				<rdf:li resource="https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html"/>				<rdf:li resource="http://radimrehurek.com/gensim/models/phrases.html"/>				<rdf:li resource="https://stackoverflow.com/questions/9663918/how-can-i-tag-and-chunk-french-text-using-nltk-and-python?rq=1"/>				<rdf:li resource="https://www.researchgate.net/post/Are_there_any_efficient_stemming_algorithms_in_addition_to_the_Porter_and_Carry_algorithms"/>				<rdf:li resource="https://www.microsoft.com/en-us/research/publication/a-ranking-approach-to-keyphrase-extraction/"/>				<rdf:li resource="http://sujitpal.blogspot.fr/2013/03/implementing-rake-algorithm-with-nltk.html"/>				<rdf:li resource="http://www.nzdl.org/Kea/index.html"/>				<rdf:li resource="https://github.com/aneesha/RAKE"/>				<rdf:li resource="http://www.hlt.utdallas.edu/~vince/papers/acl14-keyphrase-poster.jpg"/>				<rdf:li resource="http://www.aclweb.org/anthology/P/P14/P14-1119.xhtml"/>				<rdf:li resource="http://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf"/>				<rdf:li resource="http://www.hlt.utdallas.edu/~vince/papers/coling10-keyphrase.pdf"/>				<rdf:li resource="http://bdewilde.github.io/blog/2014/09/23/intro-to-automatic-keyphrase-extraction/"/>				<rdf:li resource="http://blogs.lessthandot.com/index.php/artificial-intelligence/automated-keyword-extraction-tf-idf-rake-and-textrank/"/>				<rdf:li resource="https://rare-technologies.com/text-summarization-with-gensim/"/>				<rdf:li resource="https://www.airpair.com/nlp/keyword-extraction-tutorial"/>				<rdf:li resource="http://blog.swayy.co/post/61672584784/an-algorithm-for-generating-automatic-hashtags"/>				<rdf:li resource="https://thetokenizer.com/2013/05/09/efficient-way-to-extract-the-main-topics-of-a-sentence/"/>				<rdf:li resource="https://medium.com/@acrosson/extract-subject-matter-of-documents-using-nlp-e284c1c61824"/>				<rdf:li resource="http://scikit-learn.org/stable/auto_examples/model_selection/grid_search_text_feature_extraction.html#example-model-selection-grid-search-text-feature-extraction-py"/>				<rdf:li resource="http://scikit-learn.org/stable/tutorial/text_analytics/working_with_text_data.html"/>				<rdf:li resource="http://fr.slideshare.net/julienplu/extraction-de-lasemantique"/>				<rdf:li resource="http://tartarus.org/martin/PorterStemmer/"/>			</rdf:Seq>		</items>	</channel>		<item rdf:about="http://www.semanlink.net/doc/2023/09/maarten_grootendorst_sur_x_i">		<title>Maarten Grootendorst sur X : &quot;Introducing KeyLLM. An extension to KeyBERT that can create, extract, and fine-tune keywords using Large Language Models!</title>		<link>http://www.semanlink.net/doc/2023/09/maarten_grootendorst_sur_x_i</link>		<dc:date>2023-09-30T14:26:24Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/06/scott_condron_sur_twitter_la">		<title>Scott Condron sur Twitter : &quot;a tag-based document explorer with &lt;200 lines of python 1. keyword extraction 2. LLM turns keywords into tags 3. tag docs with LLM 4. document browsing on a weave board&quot;</title>		<link>http://www.semanlink.net/doc/2023/06/scott_condron_sur_twitter_la</link>		<dc:date>2023-06-29T23:10:18Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/04/meta_learning_for_keyphrase_ext">		<title>Meta-Learning for Keyphrase Extraction | by Sinequa | Medium (2021)</title>		<link>http://www.semanlink.net/doc/2023/04/meta_learning_for_keyphrase_ext</link>		<dc:date>2023-04-07T11:44:19Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/10/maartengr_keybert_minimal_keyw">		<title>MaartenGr/KeyBERT: Minimal keyword extraction with BERT</title>		<link>http://www.semanlink.net/doc/2022/10/maartengr_keybert_minimal_keyw</link>		<dc:date>2022-10-06T14:37:52Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/09/ramsri_goutham_golla_sur_twitte">		<title>Ramsri Goutham Golla sur Twitter : &quot;how you can extract keywords from any text or document using only sentence transformer vector embeddings?&quot;</title>		<link>http://www.semanlink.net/doc/2022/09/ramsri_goutham_golla_sur_twitte</link>		<dc:date>2022-09-06T23:16:54Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/05/2012_12624_learning_dense_rep">		<title>[2012.12624&#93; Learning Dense Representations of Phrases at Scale</title>		<link>http://www.semanlink.net/doc/2022/05/2012_12624_learning_dense_rep</link>		<description>In this work, we show for the first time that we can learn dense representations of phrases alone that achieve much stronger performance in open-domain QA		</description>		<dc:date>2022-05-11T08:53:38Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/02/2109_06304_phrase_bert_impro">		<title>[2109.06304&#93; Phrase-BERT: Improved Phrase Embeddings from BERT with an Application to Corpus Exploration</title>		<link>http://www.semanlink.net/doc/2022/02/2109_06304_phrase_bert_impro</link>		<dc:date>2022-02-25T17:19:37Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/02/nlp_pos_part_of_speech_taggi">		<title>NLP: POS (Part of speech) Tagging &amp; Chunking | by Suneel Patel | Medium</title>		<link>http://www.semanlink.net/doc/2022/02/nlp_pos_part_of_speech_taggi</link>		<dc:date>2022-02-16T23:01:03Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/02/part_of_speech_pos_tag_%7C_depen">		<title>Part-of-Speech(POS) Tag | Dependency Parsing | Constituency Parsing</title>		<link>http://www.semanlink.net/doc/2022/02/part_of_speech_pos_tag_%7C_depen</link>		<dc:date>2022-02-06T01:23:19Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/09/2109_08133_phrase_retrieval_l">		<title>[2109.08133&#93; Phrase Retrieval Learns Passage Retrieval, Too</title>		<link>http://www.semanlink.net/doc/2021/09/2109_08133_phrase_retrieval_l</link>		<description>[Github&#93;(doc:2021/09/princeton_nlp_densephrases_acl)

&gt; Do we always need sentence vectors for sentence retrieval and passage vectors for passage retrieval? Our EMNLP2021 paper suggests that phrase vectors can serve as a basic building block for &quot;multi-granularity&quot; retrieval! [tweet&#93;(https://twitter.com/leejnhk/status/1441445536515584004)
&gt;
&gt; Phrases can
be directly used as the output for question answering
and slot filling tasks
&gt;
&gt; the **intuition that retrieving phrases
naturally entails retrieving larger text blocks**		</description>		<dc:date>2021-09-30T14:50:09Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/06/yake_keyword_extraction_from_s">		<title>YAKE! Keyword extraction from single documents using multiple local features (2019)</title>		<link>http://www.semanlink.net/doc/2021/06/yake_keyword_extraction_from_s</link>		<dc:date>2021-06-10T00:51:11Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/05/large_scale_evaluation_of_keyph">		<title>Large-Scale Evaluation of Keyphrase Extraction Models (2020)</title>		<link>http://www.semanlink.net/doc/2021/05/large_scale_evaluation_of_keyph</link>		<description>&gt; Results indicate that keyphrase extraction is still an open research question, with state-of-the-art neural-based models still challenged by simple baselines on some datasets

[Github&#93;(https://github.com/ygorg/JCDL_2020_KPE_Eval)		</description>		<dc:date>2021-05-31T11:56:12Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/05/simple_unsupervised_keyphrase_e">		<title>Simple Unsupervised Keyphrase Extraction using Sentence Embeddings - ACL Anthology (2018)</title>		<link>http://www.semanlink.net/doc/2021/05/simple_unsupervised_keyphrase_e</link>		<description>&gt; unsupervised
method to automatically extract keyphrases from
a document, that **only requires
the document itself**
&gt;
&gt; 1. We extract **candidate phrases
from the text, based on part-of-speech sequences**.
More precisely, we keep only those phrases that
consist of zero or more adjectives followed by one
or multiple nouns (Wan and Xiao, 2008).
&gt; 2. We
use sentence embeddings **to embed
both the candidate phrases and the document itself
in the same high-dimensional vector space**
&gt; 3.  We rank the candidate phrases to select
the output keyphrases. In addition we show how to improve the
ranking step, by providing a way to tune the diversity
of the extracted keyphrases.		</description>		<dc:date>2021-05-31T11:47:52Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/12/autophrase_automated_phrase_mi">		<title>AutoPhrase: Automated Phrase Mining from Massive Text Corpora</title>		<link>http://www.semanlink.net/doc/2020/12/autophrase_automated_phrase_mi</link>		<dc:date>2020-12-14T19:15:04Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/12/keyword_extraction_with_bert_%7C_">		<title>Keyword Extraction with BERT | Towards Data Science</title>		<link>http://www.semanlink.net/doc/2020/12/keyword_extraction_with_bert_%7C_</link>		<description>A minimal method for extracting keywords and keyphrases.

[GitHub&#93;(https://github.com/MaartenGr/KeyBERT/)

&gt; uses BERT-embeddings and simple cosine similarity to find the sub-phrases in a document that are the most similar to the document itself.		</description>		<dc:date>2020-12-06T10:07:17Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/07/dicksontsai_stanford_nlp_local_">		<title>dicksontsai/stanford-nlp-local-extension: Chrome extension for sending content to localhost server running Stanford NLP tools.</title>		<link>http://www.semanlink.net/doc/2020/07/dicksontsai_stanford_nlp_local_</link>		<dc:date>2020-07-03T17:44:02Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/03/ambiversenlu_a_natural_languag">		<title>AmbiverseNLU: A Natural Language Understanding suite by Max Planck Institute for Informatics</title>		<link>http://www.semanlink.net/doc/2020/03/ambiversenlu_a_natural_languag</link>		<dc:date>2020-03-13T10:30:41Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/08/blackstone_concept_extractor_">		<title>Blackstone Concept Extractor — ICLR&amp;D</title>		<link>http://www.semanlink.net/doc/2019/08/blackstone_concept_extractor_</link>		<dc:date>2019-08-21T08:35:18Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/07/mining_quality_phrases_from_mas">		<title>Mining Quality Phrases from Massive Text Corpora (2015)</title>		<link>http://www.semanlink.net/doc/2019/07/mining_quality_phrases_from_mas</link>		<description>framework that extracts quality phrases from text corpora integrated with phrasal segmentation.

&gt; The framework requires only limited training but the quality of phrases so generated is close to human judgment. Moreover, the method is scalable: both computation time and required space grow linearly as corpus size increases

[Related blog post&#93;(https://medium.com/@SherlockHumus/mining-quality-phrases-from-not-so-massive-text-corpora-part-i-b20b8336520a)

Used in [this Entity Linking method&#93;(/doc/?uri=https%3A%2F%2Farxiv.org%2Fabs%2F1807.06036)		</description>		<dc:date>2019-07-15T13:02:36Z</dc:date>	</item>	<item rdf:about="https://dl.acm.org/citation.cfm?id=1321475">		<title>Wikify!: linking documents to encyclopedic knowledge (2007)</title>		<link>https://dl.acm.org/citation.cfm?id=1321475</link>		<description>use of Wikipedia as a resource for automatic keyword extraction and word sense disambiguation		</description>		<dc:date>2019-04-16T22:51:58Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/02/keywords2vec">		<title>Keywords2vec</title>		<link>http://www.semanlink.net/doc/2019/02/keywords2vec</link>		<description>To generate a word2vec model, but using keywords instead of one word. Tokenize on stopwords + non word characters

(This remembers me author of [FlashText algorithm&#93;(tag:flashtext_algorithm.html) saying he had developed it to create word2vec models)		</description>		<dc:date>2019-02-09T01:43:55Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/02/jeremy_howard_on_twitter_such">		<title>Jeremy Howard on Twitter: &quot;Such a ridiculously simple idea couldn&apos;t possibly work, could it? Or... could it? &quot;</title>		<link>http://www.semanlink.net/doc/2019/02/jeremy_howard_on_twitter_such</link>		<description>&gt; We found that tokenizing using stopwords + non word characters was really useful for &quot;finding&quot; the keywords

[keywords2vec&#93;(/doc/2019/02/keywords2vec)		</description>		<dc:date>2019-02-09T01:42:55Z</dc:date>	</item>	<item rdf:about="https://tech.goibibo.com/key-topics-extraction-and-contextual-sentiment-of-users-reviews-20e63c0fd7ca">		<title>Key topics extraction and contextual sentiment of users’ reviews</title>		<link>https://tech.goibibo.com/key-topics-extraction-and-contextual-sentiment-of-users-reviews-20e63c0fd7ca</link>		<dc:date>2018-09-18T15:05:58Z</dc:date>	</item>	<item rdf:about="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3168314/">		<title>Using machine learning for concept extraction on clinical documents from multiple data sources (2011)</title>		<link>https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3168314/</link>		<dc:date>2018-08-13T17:46:50Z</dc:date>	</item>	<item rdf:about="https://www.researchgate.net/publication/321841361_Text_feature_extraction_based_on_deep_learning_a_review">		<title>Text feature extraction based on deep learning: a review (2017)</title>		<link>https://www.researchgate.net/publication/321841361_Text_feature_extraction_based_on_deep_learning_a_review</link>		<description>outlines the common methods used in
text feature extraction first, and then expands frequently used deep learning methods in text feature extraction and
its applications, and forecasts the application of deep learning in feature extraction		</description>		<dc:date>2018-08-13T14:21:24Z</dc:date>	</item>	<item rdf:about="http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0192360">		<title>Comparing deep learning and concept extraction based methods for patient phenotyping from clinical narratives (2018)</title>		<link>http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0192360</link>		<description>&gt; A CNN for NLP learns which combinations of adjacent words are associated with a given concept.
		</description>		<dc:date>2018-08-12T20:11:48Z</dc:date>	</item>	<item rdf:about="http://www.cis.pku.edu.cn/faculty/system/zhangyan/papers/DMMOOC2017-jiang.pdf">		<title>A Framework for Semi supervised Concept Extraction from MOOC content (2017)</title>		<link>http://www.cis.pku.edu.cn/faculty/system/zhangyan/papers/DMMOOC2017-jiang.pdf</link>		<dc:date>2018-08-12T18:29:53Z</dc:date>	</item>	<item rdf:about="http://acl2014.org/acl2014/P14-1/pdf/P14-1119.pdf">		<title>Automatic Keyphrase Extraction: A Survey of the State of the Art (2014)</title>		<link>http://acl2014.org/acl2014/P14-1/pdf/P14-1119.pdf</link>		<description>[same author&#93;(/doc/?uri=http%3A%2F%2Fwww.hlt.utdallas.edu%2F%7Evince%2Fpapers%2Fcoling10-keyphrase.pdf)		</description>		<dc:date>2018-08-10T10:51:50Z</dc:date>	</item>	<item rdf:about="https://arxiv.org/abs/1404.5367">		<title>[1404.5367&#93; Lexicon Infused Phrase Embeddings for Named Entity Resolution</title>		<link>https://arxiv.org/abs/1404.5367</link>		<description>Employs lexicons as part of the word embedding training: 

&gt; The skip-gram model can be trained to
predict not only neighboring words but also lexicon
membership of the central word (or phrase).

Quickly demonstrates how we can plug phrase embeddings
into an existing log-linear CRF System.

		</description>		<dc:date>2018-05-22T16:22:37Z</dc:date>	</item>	<item rdf:about="http://aclweb.org/anthology/P14-3006">		<title>An Exploration of Embeddings for Generalized Phrases (2014)</title>		<link>http://aclweb.org/anthology/P14-3006</link>		<description>&gt; generalized phrases are part
of the inventory of linguistic units that we should
compute embeddings for and we have shown that
such embeddings are superior to word form embeddings
in a coreference resolution task and standard
paraphrase identification task		</description>		<dc:date>2018-05-12T16:04:39Z</dc:date>	</item>	<item rdf:about="https://techblog.cdiscount.com/part-speech-tagging-tutorial-keras-deep-learning-library/">		<title>Part-of-Speech tagging tutorial with the Keras Deep Learning library - Cdiscount TechBlog</title>		<link>https://techblog.cdiscount.com/part-speech-tagging-tutorial-keras-deep-learning-library/</link>		<dc:date>2018-04-13T10:18:20Z</dc:date>	</item>	<item rdf:about="https://guillaumegenthial.github.io/sequence-tagging-with-tensorflow.html">		<title>Sequence Tagging with Tensorflow</title>		<link>https://guillaumegenthial.github.io/sequence-tagging-with-tensorflow.html</link>		<description>bi-LSTM + CRF with character embeddings for NER and POS.
[linked from here&#93;(http://nlp.town/blog/ner-and-the-road-to-deep-learning/)		</description>		<dc:date>2018-03-05T18:51:35Z</dc:date>	</item>	<item rdf:about="https://graphaware.com/neo4j/2017/10/03/efficient-unsupervised-topic-extraction-nlp-neo4j.html">		<title>Efficient unsupervised keywords extraction using graphs</title>		<link>https://graphaware.com/neo4j/2017/10/03/efficient-unsupervised-topic-extraction-nlp-neo4j.html</link>		<dc:date>2017-10-04T23:01:42Z</dc:date>	</item>	<item rdf:about="https://www.quora.com/What-is-a-simple-but-detailed-explanation-of-Textrank">		<title>What is a simple but detailed explanation of Textrank? - Quora</title>		<link>https://www.quora.com/What-is-a-simple-but-detailed-explanation-of-Textrank</link>		<dc:date>2017-07-12T00:58:03Z</dc:date>	</item>	<item rdf:about="https://fr.slideshare.net/andrewkoo/textrank-algorithm">		<title>How does Textrank work? (slides)</title>		<link>https://fr.slideshare.net/andrewkoo/textrank-algorithm</link>		<dc:date>2017-07-12T00:48:39Z</dc:date>	</item>	<item rdf:about="http://textminingonline.com/dive-into-nltk-part-v-using-stanford-text-analysis-tools-in-python">		<title>Dive Into NLTK, Part V: Using Stanford Text Analysis Tools in Python – Text Mining Online</title>		<link>http://textminingonline.com/dive-into-nltk-part-v-using-stanford-text-analysis-tools-in-python</link>		<description>[en cas de pb&#93;(https://gist.github.com/alvations/e1df0ba227e542955a8a)

**including how to use Java NLP Tools in python**

```
export CLASSPATH=/Users/fps/_fps/DeveloperTools/stanford-postagger-full/stanford-postagger.jar # ATTENTION, stanford-postagger.jar, pas stanford-postagger-3.8.0.jar
export STANFORD_MODELS=/Users/fps/_fps/DeveloperTools/stanford-postagger-full/models
python
```

```
from nltk.tag import StanfordPOSTagger

st = StanfordPOSTagger(&apos;english-bidirectional-distsim.tagger&apos;)
st.tag(&apos;What is the airspeed of an unladen swallow ?&apos;.split())

st = StanfordPOSTagger(&apos;french.tagger&apos;)
st.tag(&apos;Les plats servis sont toujours les mêmes et la qualité des plats est en nette baisse&apos;.split())

```

[(&apos;Les&apos;, &apos;DET&apos;), (&apos;plats&apos;, &apos;NOUN&apos;), (&apos;servis&apos;, &apos;ADJ&apos;), (&apos;sont&apos;, &apos;VERB&apos;), (&apos;toujours&apos;, &apos;ADV&apos;), (&apos;les&apos;, &apos;DET&apos;), (&apos;mêmes&apos;, &apos;ADJ&apos;), (&apos;et&apos;, &apos;CONJ&apos;), (&apos;la&apos;, &apos;DET&apos;), (&apos;qualité&apos;, &apos;NOUN&apos;), (&apos;des&apos;, &apos;DET&apos;), (&apos;plats&apos;, &apos;NOUN&apos;), (&apos;est&apos;, &apos;VERB&apos;), (&apos;en&apos;, &apos;ADP&apos;), (&apos;nette&apos;, &apos;ADJ&apos;), (&apos;baisse&apos;, &apos;NOUN&apos;)&#93;


		</description>		<dc:date>2017-07-11T18:16:16Z</dc:date>	</item>	<item rdf:about="https://github.com/nltk/nltk/wiki/Installing-Third-Party-Software">		<title>NLTK: Installing Third Party Software · nltk Wiki</title>		<link>https://github.com/nltk/nltk/wiki/Installing-Third-Party-Software</link>		<dc:date>2017-07-11T18:14:58Z</dc:date>	</item>	<item rdf:about="http://www.nltk.org/_modules/nltk/tag/stanford.html">		<title>Source code for nltk.tag.stanford — NLTK documentation</title>		<link>http://www.nltk.org/_modules/nltk/tag/stanford.html</link>		<dc:date>2017-07-11T16:13:00Z</dc:date>	</item>	<item rdf:about="http://rdrpostagger.sourceforge.net/">		<title>RDRPOSTagger: A Rule-based Part-of-Speech and Morphological Tagging Toolkit</title>		<link>http://rdrpostagger.sourceforge.net/</link>		<description>approach to automatically construct tagging rules in the form of a binary tree. Python and java		</description>		<dc:date>2017-07-11T15:46:46Z</dc:date>	</item>	<item rdf:about="http://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/">		<title>TreeTagger - a part-of-speech tagger for many languages</title>		<link>http://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/</link>		<dc:date>2017-07-11T15:44:58Z</dc:date>	</item>	<item rdf:about="http://www.nltk.org/api/nltk.tag.html#module-nltk.tag.stanford">		<title>nltk.tag.stanford module  — NLTK documentation</title>		<link>http://www.nltk.org/api/nltk.tag.html#module-nltk.tag.stanford</link>		<description>A module for interfacing with the Stanford taggers.		</description>		<dc:date>2017-07-11T15:43:03Z</dc:date>	</item>	<item rdf:about="https://nlp.stanford.edu/software/tagger.shtml">		<title>Stanford Log-linear Part-Of-Speech Tagger</title>		<link>https://nlp.stanford.edu/software/tagger.shtml</link>		<dc:date>2017-07-11T15:25:58Z</dc:date>	</item>	<item rdf:about="https://stackoverflow.com/questions/15388831/what-are-all-possible-pos-tags-of-nltk">		<title>What are all possible pos tags of NLTK? - Stack Overflow</title>		<link>https://stackoverflow.com/questions/15388831/what-are-all-possible-pos-tags-of-nltk</link>		<dc:date>2017-07-11T14:50:14Z</dc:date>	</item>	<item rdf:about="https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html">		<title>Penn Treebank P.O.S. Tags</title>		<link>https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html</link>		<description>Alphabetical list of part-of-speech tags used in the Penn Treebank Project		</description>		<dc:date>2017-07-11T14:48:26Z</dc:date>	</item>	<item rdf:about="http://radimrehurek.com/gensim/models/phrases.html">		<title>gensim: models.phrases – Phrase (collocation) detection</title>		<link>http://radimrehurek.com/gensim/models/phrases.html</link>		<description>Automatically detect common phrases – aka multi-word expressions, word n-gram collocations – from a stream of sentences.

[see also&#93;(http://www.markhneedham.com/blog/2015/02/12/pythongensim-creating-bigrams-over-how-i-met-your-mother-transcripts/#disqus_thread)		</description>		<dc:date>2017-07-10T19:05:37Z</dc:date>	</item>	<item rdf:about="https://stackoverflow.com/questions/9663918/how-can-i-tag-and-chunk-french-text-using-nltk-and-python?rq=1">		<title>How can I tag and chunk French text using NLTK and Python? - Stack Overflow</title>		<link>https://stackoverflow.com/questions/9663918/how-can-i-tag-and-chunk-french-text-using-nltk-and-python?rq=1</link>		<dc:date>2017-06-29T11:54:27Z</dc:date>	</item>	<item rdf:about="https://www.researchgate.net/post/Are_there_any_efficient_stemming_algorithms_in_addition_to_the_Porter_and_Carry_algorithms">		<title>Are there any efficient stemming algorithms in addition to the Porter and Carry algorithms?</title>		<link>https://www.researchgate.net/post/Are_there_any_efficient_stemming_algorithms_in_addition_to_the_Porter_and_Carry_algorithms</link>		<dc:date>2017-06-28T16:57:04Z</dc:date>	</item>	<item rdf:about="https://www.microsoft.com/en-us/research/publication/a-ranking-approach-to-keyphrase-extraction/">		<title>A Ranking Approach to Keyphrase Extraction - Microsoft Research (2009)</title>		<link>https://www.microsoft.com/en-us/research/publication/a-ranking-approach-to-keyphrase-extraction/</link>		<description>Previously, automatic keyphrase extraction was formalized as classification and learning methods for classification were utilized. This paper points out that it is more essential to **cast the keyphrase extraction problem as ranking** and employ a **learning to rank** method to perform the task. As example, it employs Ranking SVM, a state-of-art method of learning to rank, in keyphrase extraction
		</description>		<dc:date>2017-06-27T12:47:09Z</dc:date>	</item>	<item rdf:about="http://sujitpal.blogspot.fr/2013/03/implementing-rake-algorithm-with-nltk.html">		<title>Salmon Run: Implementing the RAKE Algorithm with NLTK</title>		<link>http://sujitpal.blogspot.fr/2013/03/implementing-rake-algorithm-with-nltk.html</link>		<dc:date>2017-06-26T14:56:18Z</dc:date>	</item>	<item rdf:about="http://www.nzdl.org/Kea/index.html">		<title>Kea (Keyphrase Extraction Algorithm)</title>		<link>http://www.nzdl.org/Kea/index.html</link>		<description>can be used for free indexing or for indexing with a controlled vocabulary. Java implementation
		</description>		<dc:date>2017-06-26T14:52:19Z</dc:date>	</item>	<item rdf:about="https://github.com/aneesha/RAKE">		<title>RAKE: A python implementation of the Rapid Automatic Keyword Extraction</title>		<link>https://github.com/aneesha/RAKE</link>		<dc:date>2017-06-26T14:43:49Z</dc:date>	</item>	<item rdf:about="http://www.hlt.utdallas.edu/~vince/papers/acl14-keyphrase-poster.jpg">		<title>Automatic Keyphrase Extraction (Poster): A Survey of the State of the Art (2014)</title>		<link>http://www.hlt.utdallas.edu/~vince/papers/acl14-keyphrase-poster.jpg</link>		<dc:date>2017-06-20T14:04:04Z</dc:date>	</item>	<item rdf:about="http://www.aclweb.org/anthology/P/P14/P14-1119.xhtml">		<title>Automatic Keyphrase Extraction: A Survey of the State of the Art (2014)</title>		<link>http://www.aclweb.org/anthology/P/P14/P14-1119.xhtml</link>		<dc:date>2017-06-20T14:01:17Z</dc:date>	</item>	<item rdf:about="http://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf">		<title>TextRank: Bringing Order into Texts (2004)</title>		<link>http://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf</link>		<description>paper, description of the TextRank algorithm		</description>		<dc:date>2017-06-14T01:16:22Z</dc:date>	</item>	<item rdf:about="http://www.hlt.utdallas.edu/~vince/papers/coling10-keyphrase.pdf">		<title>Conundrums in Unsupervised Keyphrase Extraction: Making Sense of the State-of-the-Art (2010)</title>		<link>http://www.hlt.utdallas.edu/~vince/papers/coling10-keyphrase.pdf</link>		<description>evaluation and analysis of Unsupervised Keyphrase Extraction algorithms on a variety of standard evaluation dataset.

Author Kazi Saidul Hasan provides C++ [implementations of the discussed algos&#93;(http://www.hlt.utdallas.edu/~saidul/code.html) (Tf-Idf, TextRank, SingleRank, ExpandRank)

[by same author&#93;(/doc/?uri=http%3A%2F%2Facl2014.org%2Facl2014%2FP14-1%2Fpdf%2FP14-1119.pdf)



		</description>		<dc:date>2017-06-14T00:51:23Z</dc:date>	</item>	<item rdf:about="http://bdewilde.github.io/blog/2014/09/23/intro-to-automatic-keyphrase-extraction/">		<title>Intro to Automatic Keyphrase Extraction</title>		<link>http://bdewilde.github.io/blog/2014/09/23/intro-to-automatic-keyphrase-extraction/</link>		<description>Candidate identification

- remove stop words and punctuation, filtering for words with certain part of speech / POS patterns, using external knowledge bases like wordnet or wikipedia as references of good/bad keyphrases

Keyphrase selection

- frequency stats (TF-IDT, BM25). Not very good (the best keyphrases aren’t necessarily the most frequent within a document)

- unsupervised
	- graph based ranking:
		- the importance of a candidate is determined by its relatedness to other candidates
			- frequency of co-occurence
			- semantic relatedness
		- a doc is represented as a graph (nodes = candidates)
		- algos:
			- TextRank
			- DivRank
	- topic-based clustering
- supervised
	- previously seen as a classification problem,
		- KEA
	- now seen as a ranking problem
		- ranking SVM

finally, some sample code in python

		</description>		<dc:date>2017-06-14T00:08:15Z</dc:date>	</item>	<item rdf:about="http://blogs.lessthandot.com/index.php/artificial-intelligence/automated-keyword-extraction-tf-idf-rake-and-textrank/">		<title>Automated Keyword Extraction – TF-IDF, RAKE, and TextRank (Less Than Dot - Blog)</title>		<link>http://blogs.lessthandot.com/index.php/artificial-intelligence/automated-keyword-extraction-tf-idf-rake-and-textrank/</link>		<dc:date>2017-06-13T23:23:35Z</dc:date>	</item>	<item rdf:about="https://rare-technologies.com/text-summarization-with-gensim/">		<title>Text Summarization with Gensim</title>		<link>https://rare-technologies.com/text-summarization-with-gensim/</link>		<dc:date>2017-06-02T01:02:20Z</dc:date>	</item>	<item rdf:about="https://www.airpair.com/nlp/keyword-extraction-tutorial">		<title>NLP keyword extraction tutorial with RAKE and Maui</title>		<link>https://www.airpair.com/nlp/keyword-extraction-tutorial</link>		<description>2 tools:

- simple keyword extraction with a Python library (RAKE)
- Java tool (Maui) that uses a machine-learning technique.

Focus on 2 tasks:

- Extracting the most significant words and phrases that appear in given text
- Identifying a set of topics from a predefined vocabulary that match a given text

Typical steps:

- Candidate selection (extract all possible words, phrases, terms or concepts that can potentially be keywords).
- Properties calculation (for each candidate, properties that indicate that it may be a keyword)
- Scoring and selecting keywords

RAKE: finding multi-word phrases containing frequent words. +: simplicity, ease of use -: limited accuracy, parameter configuration requirement, throws away many valid phrases, doesn’t normalize candidates (no stemming).

Maui: (&quot;Multi-purpose automatic topic indexing&quot;). Based on [Weka&#93;(/semanlink/tag/weka) (GPL, java, maven, github). Compared to RAKE:

- Extract keywords not just from text, but also with a reference to a controlled vocabulary
- Improve the accuracy by training Maui on manually chosen keywords
    - but requires a training model.

Maui can use a controlled vocabulary expressed in SKOS - so I could use it in semanlink!


		</description>		<dc:date>2017-05-24T18:20:50Z</dc:date>	</item>	<item rdf:about="http://blog.swayy.co/post/61672584784/an-algorithm-for-generating-automatic-hashtags">		<title>Swayy | Blog — An algorithm for generating automatic hashtags</title>		<link>http://blog.swayy.co/post/61672584784/an-algorithm-for-generating-automatic-hashtags</link>		<description>pretty basic, use word frequency, stemming and stopwords.
		</description>		<dc:date>2017-05-24T18:07:27Z</dc:date>	</item>	<item rdf:about="https://thetokenizer.com/2013/05/09/efficient-way-to-extract-the-main-topics-of-a-sentence/">		<title>An Efficient Way to Extract the Main Topics from a Sentence | The Tokenizer</title>		<link>https://thetokenizer.com/2013/05/09/efficient-way-to-extract-the-main-topics-of-a-sentence/</link>		<description>based on simple POS tagging (using the Brown corpus), less accurate than the default NLTK tools, but faster
		</description>		<dc:date>2017-05-24T17:58:13Z</dc:date>	</item>	<item rdf:about="https://medium.com/@acrosson/extract-subject-matter-of-documents-using-nlp-e284c1c61824">		<title>Extract Subject Matter of Documents Using NLP – Alexander Crosson – Medium</title>		<link>https://medium.com/@acrosson/extract-subject-matter-of-documents-using-nlp-e284c1c61824</link>		<dc:date>2017-05-24T17:32:42Z</dc:date>	</item>	<item rdf:about="http://scikit-learn.org/stable/auto_examples/model_selection/grid_search_text_feature_extraction.html#example-model-selection-grid-search-text-feature-extraction-py">		<title>Sample pipeline for text feature extraction and evaluation — scikit-learn documentation</title>		<link>http://scikit-learn.org/stable/auto_examples/model_selection/grid_search_text_feature_extraction.html#example-model-selection-grid-search-text-feature-extraction-py</link>		<dc:date>2016-01-12T00:45:15Z</dc:date>	</item>	<item rdf:about="http://scikit-learn.org/stable/tutorial/text_analytics/working_with_text_data.html">		<title>Working With Text Data — scikit-learn documentation</title>		<link>http://scikit-learn.org/stable/tutorial/text_analytics/working_with_text_data.html</link>		<description>scikit-learn tutorial about analysing a collection of labelled text documents :

- load the file contents and the categories
- extract feature vectors (count, tf, tf-idf)
- train a linear model to perform categorization
- use a grid search strategy (to find a good configuration of both the feature extraction components and the classifier)
		</description>		<dc:date>2015-10-21T10:08:08Z</dc:date>	</item>	<item rdf:about="http://fr.slideshare.net/julienplu/extraction-de-lasemantique">		<title>Extraction de la semantique</title>		<link>http://fr.slideshare.net/julienplu/extraction-de-lasemantique</link>		<dc:date>2014-06-18T09:29:38Z</dc:date>	</item>	<item rdf:about="http://tartarus.org/martin/PorterStemmer/">		<title>Porter Stemming Algorithm</title>		<link>http://tartarus.org/martin/PorterStemmer/</link>		<dc:date>2012-03-18T12:29:41Z</dc:date>	</item></rdf:RDF>